00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "regexp.h"
00023
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027
00028 using namespace KJS;
00029
00030 RegExp::RegExp(const UString &p, int f)
00031 : pattern(p), flgs(f), m_notEmpty(false)
00032 {
00033 #ifdef HAVE_PCREPOSIX
00034 int pcreflags = 0;
00035 const char *perrormsg;
00036 int errorOffset;
00037
00038 if (flgs & IgnoreCase)
00039 pcreflags |= PCRE_CASELESS;
00040
00041 if (flgs & Multiline)
00042 pcreflags |= PCRE_MULTILINE;
00043
00044 pcregex = pcre_compile(p.ascii(), pcreflags,
00045 &perrormsg, &errorOffset, NULL);
00046 #ifndef NDEBUG
00047 if (!pcregex)
00048 fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
00049 #endif
00050
00051 #ifdef PCRE_INFO_CAPTURECOUNT
00052
00053 int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
00054 if (rc != 0)
00055 #endif
00056 nrSubPatterns = 0;
00057
00058 #else
00059
00060 nrSubPatterns = 0;
00061 int regflags = 0;
00062 #ifdef REG_EXTENDED
00063 regflags |= REG_EXTENDED;
00064 #endif
00065 #ifdef REG_ICASE
00066 if ( f & IgnoreCase )
00067 regflags |= REG_ICASE;
00068 #endif
00069
00070
00071
00072
00073
00074
00075 if (regcomp(&preg, p.ascii(), regflags) != 0) {
00076
00077 regcomp(&preg, "", regflags);
00078 }
00079 #endif
00080 }
00081
00082 RegExp::~RegExp()
00083 {
00084 #ifdef HAVE_PCREPOSIX
00085 if (pcregex)
00086 pcre_free(pcregex);
00087 #else
00088
00089 regfree(&preg);
00090 #endif
00091 }
00092
00093 UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
00094 {
00095 if (i < 0)
00096 i = 0;
00097 if (ovector)
00098 *ovector = 0L;
00099 int dummyPos;
00100 if (!pos)
00101 pos = &dummyPos;
00102 *pos = -1;
00103 if (i > s.size() || s.isNull())
00104 return UString::null;
00105
00106 #ifdef HAVE_PCREPOSIX
00107 CString buffer(s.cstring());
00108 int bufferSize = buffer.size();
00109 int ovecsize = (nrSubPatterns+1)*3;
00110 if (ovector) *ovector = new int[ovecsize];
00111 if (!pcregex)
00112 return UString::null;
00113
00114 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i,
00115 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0,
00116 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00117 {
00118
00119 if ((flgs & Global) && m_notEmpty && ovector)
00120 {
00121
00122
00123
00124 fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");
00125 m_notEmpty = 0;
00126 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0,
00127 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00128 return UString::null;
00129 }
00130 else
00131 return UString::null;
00132 }
00133
00134
00135
00136 if (!ovector)
00137 return UString::null;
00138 #else
00139 const uint maxMatch = 10;
00140 regmatch_t rmatch[maxMatch];
00141
00142 char *str = strdup(s.ascii());
00143 if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
00144 free(str);
00145 return UString::null;
00146 }
00147 free(str);
00148
00149 if (!ovector) {
00150 *pos = rmatch[0].rm_so + i;
00151 return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
00152 }
00153
00154
00155 nrSubPatterns = 0;
00156 for(uint j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
00157 nrSubPatterns++;
00158 int ovecsize = (nrSubPatterns+1)*3;
00159 *ovector = new int[ovecsize];
00160 for (uint j = 0; j < nrSubPatterns + 1; j++) {
00161 if (j>maxMatch)
00162 break;
00163 (*ovector)[2*j] = rmatch[j].rm_so + i;
00164 (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
00165 }
00166 #endif
00167
00168 *pos = (*ovector)[0];
00169 #ifdef HAVE_PCREPOSIX // TODO check this stuff in non-pcre mode
00170 if ( *pos == (*ovector)[1] && (flgs & Global) )
00171 {
00172
00173 m_notEmpty=true;
00174 }
00175 #endif
00176 return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
00177 }
00178
00179 #if 0 // unused
00180 bool RegExp::test(const UString &s, int)
00181 {
00182 #ifdef HAVE_PCREPOSIX
00183 int ovector[300];
00184 CString buffer(s.cstring());
00185
00186 if (s.isNull() ||
00187 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
00188 0, ovector, 300) == PCRE_ERROR_NOMATCH)
00189 return false;
00190 else
00191 return true;
00192
00193 #else
00194
00195 char *str = strdup(s.ascii());
00196 int r = regexec(&preg, str, 0, 0, 0);
00197 free(str);
00198
00199 return r == 0;
00200 #endif
00201 }
00202 #endif