diff options
Diffstat (limited to 'src/regex.c')
-rw-r--r-- | src/regex.c | 391 |
1 files changed, 0 insertions, 391 deletions
diff --git a/src/regex.c b/src/regex.c deleted file mode 100644 index 32fe6ab..0000000 --- a/src/regex.c +++ /dev/null @@ -1,391 +0,0 @@ -#include <assert.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -#include "regex.h" -#include "label_file.h" - -int regex_prepare_data(struct regex_data ** regex, char const * pattern_string, - struct regex_error_data * errordata) { - memset(errordata, 0, sizeof(struct regex_error_data)); - *regex = regex_data_create(); - if (!(*regex)) - return -1; -#ifdef USE_PCRE2 - (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string, - PCRE2_ZERO_TERMINATED, - PCRE2_DOTALL, - &errordata->error_code, - &errordata->error_offset, NULL); -#else - (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL, - &errordata->error_buffer, - &errordata->error_offset, NULL); -#endif - if (!(*regex)->regex) { - goto err; - } - -#ifdef USE_PCRE2 - (*regex)->match_data = - pcre2_match_data_create_from_pattern((*regex)->regex, NULL); - if (!(*regex)->match_data) { - goto err; - } -#else - (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); - if (!(*regex)->sd && errordata->error_buffer) { - goto err; - } - (*regex)->extra_owned = !!(*regex)->sd; -#endif - return 0; - -err: regex_data_free(*regex); - *regex = NULL; - return -1; -} - -char const * regex_version() { -#ifdef USE_PCRE2 - static int initialized = 0; - static char * version_string = NULL; - size_t version_string_len; - if (!initialized) { - version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL); - version_string = (char*) malloc(version_string_len); - if (!version_string) { - return NULL; - } - pcre2_config(PCRE2_CONFIG_VERSION, version_string); - initialized = 1; - } - return version_string; -#else - return pcre_version(); -#endif -} - -int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { - int rc; - size_t entry_len, info_len; - - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); -#ifdef USE_PCRE2 - if (rc < 0) - return -1; - -#ifndef NO_PERSISTENTLY_STORED_PATTERNS - /* this should yield exactly one because we store one pattern at a time - */ - rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); - if (rc != 1) - return -1; - - *regex = regex_data_create(); - if (!*regex) - return -1; - - rc = pcre2_serialize_decode(&(*regex)->regex, 1, - (PCRE2_SPTR)mmap_area->next_addr, NULL); - if (rc != 1) - goto err; - - (*regex)->match_data = - pcre2_match_data_create_from_pattern((*regex)->regex, NULL); - if (!(*regex)->match_data) - goto err; - -#endif /* NO_PERSISTENTLY_STORED_PATTERNS */ - /* and skip the decoded bit */ - rc = next_entry(NULL, mmap_area, entry_len); - if (rc < 0) - goto err; - - return 0; -#else - if (rc < 0 || !entry_len) { - rc = -1; - return -1; - } - *regex = regex_data_create(); - if (!(*regex)) - return -1; - - (*regex)->regex = (pcre *) mmap_area->next_addr; - rc = next_entry(NULL, mmap_area, entry_len); - if (rc < 0) - goto err; - - /* Check that regex lengths match. pcre_fullinfo() - * also validates its magic number. */ - rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); - if (rc < 0 || info_len != entry_len) { - goto err; - } - - rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); - if (rc < 0 || !entry_len) { - goto err; - } - (*regex)->lsd.study_data = (void *) mmap_area->next_addr; - (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; - rc = next_entry(NULL, mmap_area, entry_len); - if (rc < 0) - goto err; - - /* Check that study data lengths match. */ - rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, - PCRE_INFO_STUDYSIZE, - &info_len); - if (rc < 0 || info_len != entry_len) { - goto err; - } - (*regex)->extra_owned = 0; - return 0; -#endif - err: regex_data_free(*regex); - *regex = NULL; - return -1; -} - -int regex_writef(struct regex_data * regex, FILE * fp) { - int rc; - size_t len; -#ifdef USE_PCRE2 - PCRE2_UCHAR * bytes; - PCRE2_SIZE to_write; - -#ifndef NO_PERSISTENTLY_STORED_PATTERNS - /* encode the patter for serialization */ - rc = pcre2_serialize_encode(®ex->regex, 1, &bytes, &to_write, NULL); - if (rc != 1) - return -1; - -#else - (void)regex; // silence unused parameter warning - to_write = 0; -#endif - /* write serialized pattern's size */ - len = fwrite(&to_write, sizeof(uint32_t), 1, fp); - if (len != 1) { -#ifndef NO_PERSISTENTLY_STORED_PATTERNS - pcre2_serialize_free(bytes); -#endif - return -1; - } - -#ifndef NO_PERSISTENTLY_STORED_PATTERNS - /* write serialized pattern */ - len = fwrite(bytes, 1, to_write, fp); - if (len != to_write) { - pcre2_serialize_free(bytes); - return -1; - } - pcre2_serialize_free(bytes); -#endif -#else - uint32_t to_write; - size_t size; - pcre_extra * sd = regex->extra_owned ? regex->sd : ®ex->lsd; - - /* determine the size of the pcre data in bytes */ - rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); - if (rc < 0) - return -1; - - /* write the number of bytes in the pcre data */ - to_write = size; - len = fwrite(&to_write, sizeof(uint32_t), 1, fp); - if (len != 1) - return -1; - - /* write the actual pcre data as a char array */ - len = fwrite(regex->regex, 1, to_write, fp); - if (len != to_write) - return -1; - - /* determine the size of the pcre study info */ - rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size); - if (rc < 0) - return -1; - - /* write the number of bytes in the pcre study data */ - to_write = size; - len = fwrite(&to_write, sizeof(uint32_t), 1, fp); - if (len != 1) - return -1; - - /* write the actual pcre study data as a char array */ - len = fwrite(sd->study_data, 1, to_write, fp); - if (len != to_write) - return -1; -#endif - return 0; -} - -struct regex_data * regex_data_create() { - struct regex_data * dummy = (struct regex_data*) malloc( - sizeof(struct regex_data)); - if (dummy) { - memset(dummy, 0, sizeof(struct regex_data)); - } - return dummy; -} - -void regex_data_free(struct regex_data * regex) { - if (regex) { -#ifdef USE_PCRE2 - if (regex->regex) { - pcre2_code_free(regex->regex); - } - if (regex->match_data) { - pcre2_match_data_free(regex->match_data); - } -#else - if (regex->regex) - pcre_free(regex->regex); - if (regex->extra_owned && regex->sd) { - pcre_free_study(regex->sd); - } -#endif - free(regex); - } -} - -int regex_match(struct regex_data * regex, char const * subject, int partial) { - int rc; -#ifdef USE_PCRE2 - rc = pcre2_match(regex->regex, - (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, - partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, - NULL); - if (rc > 0) - return REGEX_MATCH; - switch (rc) { - case PCRE2_ERROR_PARTIAL: - return REGEX_MATCH_PARTIAL; - case PCRE2_ERROR_NOMATCH: - return REGEX_NO_MATCH; - default: - return REGEX_ERROR; - } -#else - rc = pcre_exec(regex->regex, - regex->extra_owned ? regex->sd : ®ex->lsd, subject, - strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0, - NULL, - 0); - switch (rc) { - case 0: - return REGEX_MATCH; - case PCRE_ERROR_PARTIAL: - return REGEX_MATCH_PARTIAL; - case PCRE_ERROR_NOMATCH: - return REGEX_NO_MATCH; - default: - return REGEX_ERROR; - } -#endif -} - -/* TODO Replace this compare function with something that actually compares the - * regular expressions. - * This compare function basically just compares the binary representations of - * the automatons, and because this representation contains pointers and - * metadata, it can only return a match if regex1 == regex2. - * Preferably, this function would be replaced with an algorithm that computes - * the equivalence of the automatons systematically. - */ -int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { - int rc; - size_t len1, len2; -#ifdef USE_PCRE2 - rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); - assert(rc == 0); - rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); - assert(rc == 0); - if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) - return SELABEL_INCOMPARABLE; -#else - rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); - assert(rc == 0); - rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); - assert(rc == 0); - if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) - return SELABEL_INCOMPARABLE; -#endif - return SELABEL_EQUAL; -} - -void regex_format_error(struct regex_error_data const * error_data, - char * buffer, size_t buf_size) { - unsigned the_end_length = buf_size > 4 ? 4 : buf_size; - char * ptr = &buffer[buf_size - the_end_length]; - int rc = 0; - size_t pos = 0; - if (!buffer || !buf_size) - return; - rc = snprintf(buffer, buf_size, "REGEX back-end error: "); - if (rc < 0) - /* If snprintf fails it constitutes a logical error that needs - * fixing. - */ - abort(); - - pos += rc; - if (pos >= buf_size) - goto truncated; - - if (error_data->error_offset > 0) { -#ifdef USE_PCRE2 - rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ", - error_data->error_offset); -#else - rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ", - error_data->error_offset); -#endif - if (rc < 0) - abort(); - - } - pos += rc; - if (pos >= buf_size) - goto truncated; - -#ifdef USE_PCRE2 - rc = pcre2_get_error_message(error_data->error_code, - (PCRE2_UCHAR*)(buffer + pos), - buf_size - pos); - if (rc == PCRE2_ERROR_NOMEMORY) - goto truncated; -#else - rc = snprintf(buffer + pos, buf_size - pos, "%s", - error_data->error_buffer); - if (rc < 0) - abort(); - - if ((size_t)rc < strlen(error_data->error_buffer)) - goto truncated; -#endif - - return; - -truncated: - /* replace end of string with "..." to indicate that it was truncated */ - switch (the_end_length) { - /* no break statements, fall-through is intended */ - case 4: - *ptr++ = '.'; - case 3: - *ptr++ = '.'; - case 2: - *ptr++ = '.'; - case 1: - *ptr++ = '\0'; - default: - break; - } - return; -} |