Task:
This function searches given null terminated string pStr
by given subset of
regular expression pMatch
.
Return value is matched string appeared in pStr
.
If nothing has matched, return value is empty string.
The subset of regular expression grammar is defined as follows:
- A string is set of 8-bit characters. Beware that it is not 7-bit.
- The metacharacter
\
is an escape character, that means any single letter appeared next used as is. - The characters in squared brackets metacharacters
[
and]
is a sequence of literal characters or metacharacters. Immediately after]
, another metacharacters in curly braces{
num}
must be followed. The num is a sequence of digits which specifies the number of times the previous literal character or metacharacter sequence in squared brackets must be repeated. e.g. "abc[def]{2}ghi
" matches "abcdefdefghi
". - The squared brackets metacharacters can be nested.
e.g. "
abc[def[ghi]{3}jkl]{2}mno
" matches "abcdefghighighijkldefghighighijklmno
". - In case of matching pattern given by
pMatch
is a grammatical error, this function returns a string "ERROR
".
My Solution:
Define a state machine and put regular expression strings into stacks. When used then pop up, multiple by N
times and push to stack again.
escape <-- escape <---- escape <-----
| | | | | |
| | | | | |
normal --> repeatStrStart --> repeatStrEnd --> repeatNumStart --
^ ^ | |
| | | |
| ------------ |
----------------------------------------------------
Code:
string PatternSearch(const unsigned char* pStr, const unsigned char* pMatch)
{
enum status {
normal,
escape,
repeatStrStart,
repeatStrEnd,
repeatNumStart
};
status current = normal;
status restore = normal;
const static string strErr = "ERROR";
stringstream bufNormal;
stringstream bufRepeat;
stringstream bufTmp;
stack<unsigned char> stackBracket;
stack<string> stackRepeat;
unsigned long repeat = 0;
size_t check = 0;
int i = 0;
for(i=0; pMatch[i] != '\0'; i++) {
if(current == normal) {
if(pMatch[i] == '\\') {
restore = current;
current = escape;
}
else if(pMatch[i] == '[') {
current = repeatStrStart;
stackBracket.push(pMatch[i]);
}
else {
bufNormal << pMatch[i];
}
}
else if(current == escape) {
if(restore == normal) {
bufNormal << pMatch[i];
current = restore;
}
else if(restore == repeatStrStart) {
bufRepeat << pMatch[i];
current = restore;
}
else if(restore == repeatNumStart) {
bufRepeat << pMatch[i];
current = restore;
}
else {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
}
else if(current == repeatStrStart) {
if(pMatch[i] == '\\') {
restore = current;
current = escape;
}
else if(pMatch[i] == '[') {
stackBracket.push(pMatch[i]);
stackRepeat.push(bufRepeat.str());
bufRepeat.str("");
bufRepeat.clear();
}
else if(pMatch[i] == ']') {
if(stackBracket.empty() == false && stackBracket.top() == '[') {
current = repeatStrEnd;
if(stackBracket.size() > stackRepeat.size()) {
stackRepeat.push(bufRepeat.str());
}
else {
bufTmp.str("");
bufTmp.clear();
bufTmp << stackRepeat.top();
bufTmp << bufRepeat.str();
stackRepeat.pop();
stackRepeat.push(bufTmp.str());
bufTmp.str("");
bufTmp.clear();
}
stackBracket.pop();
bufRepeat.str("");
bufRepeat.clear();
}
else {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
}
else {
bufRepeat << pMatch[i];
}
}
else if(current == repeatStrEnd) {
if(pMatch[i] == '{') {
stackBracket.push(pMatch[i]);
current = repeatNumStart;
}
else {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
}
else {
if(pMatch[i] == '\\') {
restore = current;
current = escape;
}
else if(pMatch[i] == '}') {
if(stackBracket.empty() == false && stackBracket.top() == '{') {
stackBracket.pop();
try {
repeat = stoul(/*str=*/bufRepeat.str(), /*idx=*/&check);
if(check == bufRepeat.str().length()) {
bufRepeat.str("");
bufRepeat.clear();
while(repeat--) {
bufRepeat << stackRepeat.top();
}
stackRepeat.pop();
if(stackRepeat.empty()) {
bufNormal << bufRepeat.str();
}
else {
bufTmp.str("");
bufTmp.clear();
bufTmp << stackRepeat.top();
bufTmp << bufRepeat.str();
stackRepeat.pop();
stackRepeat.push(bufTmp.str());
}
bufTmp.str("");
bufTmp.clear();
bufRepeat.str("");
bufRepeat.clear();
}
else {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
}
catch(invalid_argument e) {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
catch(out_of_range e) {
bufNormal.str(strErr);
bufNormal.clear();
break;
}
if(stackBracket.empty()) {
current = normal;
}
else {
current = repeatStrStart;
}
}
else {
bufRepeat.str(strErr);
bufRepeat.clear();
break;
}
}
else {
bufRepeat << pMatch[i];
}
}
}
if(stackBracket.empty() == false || stackRepeat.empty() == false) {
bufNormal.str(strErr);
bufNormal.clear();
}
else if(current != normal) {
bufNormal.str(strErr);
bufNormal.clear();
}
else {
bufTmp.str("");
bufTmp.clear();
bufTmp << pStr;
if(bufTmp.str().find(bufNormal.str()) == string::npos) {
bufNormal.str("");
bufNormal.clear();
}
}
return bufNormal.str();
}
std::regex
? \$\endgroup\$