/******************************************************************************* * Copyright (c) 2000, 2020 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this * distribution and is available at http://eclipse.org/legal/epl-2.0 * or the Apache License, Version 2.0 which accompanies this distribution * and is available at https://www.apache.org/licenses/LICENSE-2.0. * * This Source Code may also be made available under the following Secondary * Licenses when the conditions for such availability set forth in the * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, * version 2 with the GNU Classpath Exception [1] and GNU General Public * License, version 2 with the OpenJDK Assembly Exception [2]. * * [1] https://www.gnu.org/software/classpath/license.html * [2] http://openjdk.java.net/legal/assembly-exception.html * * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception *******************************************************************************/ #include "infra/SimpleRegex.hpp" #include <ctype.h> #include <stdio.h> #include <string.h> #include "env/FrontEnd.hpp" #include "compile/Compilation.hpp" #include "compile/ResolvedMethod.hpp" #include "env/StackMemoryRegion.hpp" #include "env/VerboseLog.hpp" #include "il/DataTypes.hpp" #include "ras/Debug.hpp" #include "ras/IgnoreLocale.hpp" /*** This is a mechanism to define a "filter." A "filter" is basically a (form of a) regular expression that allows one to specify a set of class names, method names, etc. <filter>: <simple_pattern> | <simple_pattern> "[:|]" <filter> <simple_pattern>: <component> | <component> <simple_pattern> <component>: <str> | <wildcards> | <alternate> for example, proc[d-g]:func*z will define a filter that matches procd, proce, procf and procg as well as any string that begins with func and ends with z. If the first simple_pattern of a filter is a ^, the truth value of the entire filter is inverted. ***/ namespace TR { SimpleRegex *SimpleRegex::create(char *&s) { if (s == NULL || s[0] != '{') return NULL; char *origStr = s; ++s; bool negate = (s[0] == '^'); if (negate) ++s; bool foundError = false; Regex *regex = processRegex(s, foundError); if (!regex || s[0] != '}' || foundError) return NULL; ++s; SimpleRegex *result = (SimpleRegex *)jitPersistentAlloc(sizeof(SimpleRegex)); result->_regex = regex; result->_negate = negate; result->_regexStr = origStr; result->_regexStrLen = s - origStr; return result; } SimpleRegex::Regex *SimpleRegex::processRegex(char *&s, bool &foundError) { // First get rid of all + and | // while (s[0] == ',' || s[0] == '|') ++s; if (s[0] == '}' || s[0] == '\0') return NULL; Regex *regex = (Regex *)jitPersistentAlloc(sizeof(Regex)); regex->simple = processSimple(s, TR_maybe, foundError); if (foundError) return NULL; regex->remainder = processRegex(s, foundError); if (foundError) return NULL; return regex; } void *SimpleRegex::Component::operator new (size_t size, PERSISTENT_NEW_DECLARE, size_t numChars) { if (numChars > sizeof(Data)) size += numChars - sizeof(Data); return jitPersistentAlloc(size); } // Process a simple_pattern: a sequence of components // SimpleRegex::Simple *SimpleRegex::processSimple(char *&s, TR_YesNoMaybe allowAlternates, bool &foundError) { int32_t i,lo,hi; char *startSimple = s; if (s[0] == '\0' || s[0] == ',' || s[0] == '|' || s[0] == '}') return NULL; Simple *m = (Simple *)jitPersistentAlloc(sizeof(Simple)); // Decide if we are going to allow alternatives - if there are no ']' chars // in the regex then '[' chars are treated as regular characters (since they // can occur in method signatures). // if (s[0] == '[' && allowAlternates == TR_maybe) { if (s[1] == '^') allowAlternates = TR_yes; else { allowAlternates = TR_no; for (i = 0; s[i] != '}' && s[i] != '\0'; i++) { if (s[i] == ']') { allowAlternates = TR_yes; break; } if (s[i] == '\\' && s[i+1] != '\0') i++; } } } if (s[0] == '[' && allowAlternates == TR_yes) { // process character alternatives // bool invert = false; m->component = new (PERSISTENT_NEW, 0) Component; m->component->type = char_alternatives; for (i = 0; i < sizeof(m->component->data.bit_map) / sizeof(m->component->data.bit_map[0]); m->component->data.bit_map[i++] = 0) {} ++s; if (s[0] == '^') { invert = true; allowAlternates = TR_yes; ++s; } while (s[0] != ']' && s[0] != '}' && s[0] != '\0') { if (s[0] == '\\' && s[1] != '\0') ++s; if (s[1] == '-' && s[2] != ']' && s[2] != '\0') { lo = s[0]; s += 2; if (s[0] == '\\' && s[1] != ']' && s[1] != '\0') ++s; } else { lo = s[0]; } hi = s[0]; ++s; if (lo > hi) { i = lo; lo = hi; hi = i; } for (i = lo; i <= hi; ++i) { m->component->data.bit_map[BWORD(i)] |= (CONSTANT64(1) << BBIT(i)); } } if (s[0] != ']') { s = startSimple; foundError = true; return NULL; } ++s; if (invert) { for (i = 0; i < sizeof(m->component->data.bit_map) / sizeof(m->component->data.bit_map[0]); i++) { m->component->data.bit_map[i] = ~m->component->data.bit_map[i]; } } } else if (s[0] == '?' || s[0] == '*') { // process wild card characters // m->component = new (PERSISTENT_NEW, 0) Component; m->component->type = wildcards; m->component->data.counts = 0; while (s[0] == '?' || s[0] == '*') { if (s[0] == '?') m->component->data.counts += 2; else m->component->data.counts |= 1; ++s; } } else { // process ordinary characters // i = 0; while (s[i] != '\0' && s[i] != '*' && s[i] != '?' && s[i] != ',' && s[i] != '|' && s[i] != '}') { if (s[i] == '[' && allowAlternates != TR_no) break; if (s[i] == '\\' && s[i+1] != '\0') ++i; ++i; } m->component = new (PERSISTENT_NEW, i+1) Component; m->component->type = simple_string; i = 0; while (s[0] != '\0' && s[0] != '*' && s[0] != '?' && s[0] != ',' && s[0] != '|' && s[0] != '}') { if (s[0] == '[' && allowAlternates != TR_no) break; if (s[0] == '\\' && s[1] != '\0') ++s; m->component->data.str[i++] = s[0]; ++s; } m->component->data.str[i++] = '\0'; } m->remainder = processSimple(s, allowAlternates, foundError); if (foundError) return NULL; if (m->remainder == NULL) m->fixed_chars_right = 0; else { if ((m->remainder->component->type == wildcards && (m->remainder->component->data.counts & 1) != 0) || (m->remainder->fixed_chars_right == 0 && m->remainder->remainder != NULL)) { m->fixed_chars_right = 0; } else { i = 0; switch (m->remainder->component->type) { case simple_string: i = (int32_t)strlen(m->remainder->component->data.str); break; case wildcards: i = m->remainder->component->data.counts >> 1; break; case char_alternatives: i = 1; break; default: /*** ???? ***/ break; } m->fixed_chars_right = m->remainder->fixed_chars_right + i; } } return m; } bool SimpleRegex::Simple::matchesRemainder( const char *s, bool isCaseSensitive, bool useLocale ) { if (remainder == NULL) return s[0] == '\0'; return remainder->match(s, isCaseSensitive, useLocale); } bool SimpleRegex::Simple::match(const char *s, bool isCaseSensitive, bool useLocale) { int i; switch (component->type) { case simple_string: for (i = 0; component->data.str[i]; ++i, ++s) { if (isCaseSensitive) { if (s[0] != component->data.str[i]) return false; } else { char lower_s0 = useLocale ? (tolower(s[0])) : (::tolower_ignore_locale(s[0])); char lower_stri = useLocale ? (tolower(component->data.str[i])) : (::tolower_ignore_locale(component->data.str[i])); bool notEqual = (lower_s0 != lower_stri); if (notEqual) return false; } } return matchesRemainder(s, isCaseSensitive, useLocale); break; case wildcards: if (strlen(s) < component->data.counts >> 1) return false; s += (component->data.counts >> 1); if ((component->data.counts & 1) == 0) { return matchesRemainder(s, isCaseSensitive, useLocale); } if (fixed_chars_right != 0 || remainder == NULL) { return (strlen(s) >= fixed_chars_right) && matchesRemainder(s + strlen(s) - fixed_chars_right, isCaseSensitive, useLocale); } // tricky case abc*def*ghi // do { if (matchesRemainder(s, isCaseSensitive, useLocale)) return true; } while (*++s); return false; break; case char_alternatives: { i = s[0]; bool charIsInBitMap = ((component->data.bit_map[BWORD(i)] & (CONSTANT64(1) << BBIT(i))) != 0); if (!isCaseSensitive && !charIsInBitMap) { char lower_i = useLocale ? (tolower(i)) : (::tolower_ignore_locale(i)); char upper_i = useLocale ? (toupper(i)) : (::toupper_ignore_locale(i)); charIsInBitMap = ((component->data.bit_map[BWORD(lower_i)] & (CONSTANT64(1) << BBIT(lower_i))) != 0) || ((component->data.bit_map[BWORD(upper_i)] & (CONSTANT64(1) << BBIT(upper_i))) != 0); } return charIsInBitMap && matchesRemainder(++s, isCaseSensitive, useLocale); break; } default: /*** ???? ***/ return false; break; } // this is just to quiet the compiler -- it's never reached // return false; } bool SimpleRegex::Regex::match(const char *s, bool isCaseSensitive, bool useLocale) { int32_t rc = false; for (Regex *p = this; p && !rc; p = p->remainder) { rc = p->simple->match(s, isCaseSensitive, useLocale); } return rc; } bool SimpleRegex::match(const char *s, bool isCaseSensitive, bool useLocale) { bool result = _regex->match(s, isCaseSensitive, useLocale); if (_negate) result = !result; return result; } void SimpleRegex::print(bool negate) { TR_VerboseLog::vlogAcquire(); TR_VerboseLog::write("{"); if (negate ^ _negate) TR_VerboseLog::write("^"); _regex->print(); TR_VerboseLog::write("}"); TR_VerboseLog::vlogRelease(); } void SimpleRegex::Regex::print() { if (simple) simple->print(); if (remainder) { TR_VerboseLog::vlogAcquire(); TR_VerboseLog::write("|"); TR_VerboseLog::vlogRelease(); remainder->print(); } } void SimpleRegex::Simple::print() { TR_VerboseLog::vlogAcquire(); int32_t i; switch (component->type) { case wildcards: for (i = 2; i <= component->data.counts; i += 2) TR_VerboseLog::write("?"); if (component->data.counts & 1) TR_VerboseLog::write("*"); break; case char_alternatives: { TR_VerboseLog::write("["); if ((component->data.bit_map[BWORD(0)] & (1 << BBIT(0))) != 0) { TR_VerboseLog::write("^"); for (i = 1; i < 256; i++) { if ((component->data.bit_map[BWORD(i)] & (CONSTANT64(1) << BBIT(i))) == 0) TR_VerboseLog::write("%c", i); } } else { for (i = 1; i < 256; i++) { if ((component->data.bit_map[BWORD(i)] & (CONSTANT64(1) << BBIT(i))) != 0) TR_VerboseLog::write("%c", i); } } TR_VerboseLog::write("]"); break; } case simple_string: TR_VerboseLog::write("%s", component->data.str); break; } if (remainder) remainder->print(); TR_VerboseLog::vlogRelease(); } bool SimpleRegex::match(TR::SimpleRegex *regex, const char *s, bool isCaseSensitive) { return regex->match(s, isCaseSensitive, true); } bool SimpleRegex::match(TR::SimpleRegex *regex, int32_t i, bool isCaseSensitive) { char s[20]; sprintf(s, "%d", i); return regex->match(s, isCaseSensitive, true); } bool SimpleRegex::match( TR::SimpleRegex *regex, TR_ResolvedMethod * feMethod, bool isCaseSensitive) { TR::Compilation *comp = TR::comp(); TR::StackMemoryRegion stackMemoryRegion(*comp->trMemory()); bool result = regex->match(feMethod->signature(comp->trMemory(), stackAlloc), isCaseSensitive, true); return result; } bool SimpleRegex::matchIgnoringLocale( TR::SimpleRegex *regex, const char *s, bool isCaseSensitive) { return regex->match(s, isCaseSensitive, false); } }