如何实现模式搜索Boyer Moore算法？详细解析和实现

2021年3月28日13:41:28 发表评论 1,472 次浏览

本文概述

``````Input:  txt[] = "THIS IS A TEST TEXT"
pat[] = "TEST"
Output: Pattern found at index 10

pat[] =  "AABA"
Output: Pattern found at index 0
Pattern found at index 9
Pattern found at index 12``````

Boyer Moore算法是以下两种方法的组合。

1)坏字符启发式

2)良好的后缀启发式

1)不匹配变成匹配

2)模式P移过不匹配的字符。

C ++

``````/* C++ Program for Bad Character Heuristic of Boyer
Moore String Matching Algorithm */
#include <bits/stdc++.h>
using namespace std;
# define NO_OF_CHARS 256

// The preprocessing function for Boyer Moore's
{
int i;

// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)

// Fill the actual value of last occurrence
// of a character
for (i = 0; i < size; i++)
badchar[( int ) str[i]] = i;
}

/* A pattern searching function that uses Bad
Character Heuristic of Boyer Moore Algorithm */
void search( string txt, string pat)
{
int m = pat.size();
int n = txt.size();

/* Fill the bad character array by calling
for given pattern */

int s = 0; // s is shift of the pattern with
// respect to text
while (s <= (n - m))
{
int j = m - 1;

/* Keep reducing index j of pattern while
characters of pattern and text are
matching at this shift s */
while (j >= 0 && pat[j] == txt展开)
j--;

/* If the pattern is present at current
shift, then index j will become -1 after
the above loop */
if (j < 0)
{
cout << "pattern occurs at shift = " <<  s << endl;

/* Shift the pattern so that the next
character in text aligns with the last
occurrence of it in pattern.
The condition s+m < n is necessary for
the case when pattern occurs at the end
of text */
s += (s + m < n)? m-badchar[txt展开] : 1;

}

else
/* Shift the pattern so that the bad character
in text aligns with the last occurrence of
it in pattern. The max function is used to
make sure that we get a positive shift.
We may get a negative shift if the last
occurrence of bad character in pattern
is on the right side of the current
character. */
s += max(1, j - badchar[txt展开]);
}
}

/* Driver code */
int main()
{
string txt= "ABAAABCD" ;
string pat = "ABC" ;
search(txt, pat);
return 0;
}

// This code is contributed by rathbhupendra``````

C

``````/* C Program for Bad Character Heuristic of Boyer
Moore String Matching Algorithm */
# include <limits.h>
# include <string.h>
# include <stdio.h>

# define NO_OF_CHARS 256

// A utility function to get maximum of two integers
int max ( int a, int b) { return (a > b)? a: b; }

// The preprocessing function for Boyer Moore's
{
int i;

// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)

// Fill the actual value of last occurrence
// of a character
for (i = 0; i < size; i++)
badchar[( int ) str[i]] = i;
}

/* A pattern searching function that uses Bad
Character Heuristic of Boyer Moore Algorithm */
void search( char *txt, char *pat)
{
int m = strlen (pat);
int n = strlen (txt);

/* Fill the bad character array by calling
for given pattern */

int s = 0;  // s is shift of the pattern with
// respect to text
while (s <= (n - m))
{
int j = m-1;

/* Keep reducing index j of pattern while
characters of pattern and text are
matching at this shift s */
while (j >= 0 && pat[j] == txt展开)
j--;

/* If the pattern is present at current
shift, then index j will become -1 after
the above loop */
if (j < 0)
{
printf ( "\n pattern occurs at shift = %d" , s);

/* Shift the pattern so that the next
character in text aligns with the last
occurrence of it in pattern.
The condition s+m < n is necessary for
the case when pattern occurs at the end
of text */
s += (s+m < n)? m-badchar[txt展开] : 1;

}

else
/* Shift the pattern so that the bad character
in text aligns with the last occurrence of
it in pattern. The max function is used to
make sure that we get a positive shift.
We may get a negative shift if the last
occurrence  of bad character in pattern
is on the right side of the current
character. */
s += max(1, j - badchar[txt展开]);
}
}

/* Driver program to test above function */
int main()
{
char txt[] = "ABAAABCD" ;
char pat[] = "ABC" ;
search(txt, pat);
return 0;
}``````

Java

``````/* Java Program for Bad Character Heuristic of Boyer
Moore String Matching Algorithm */

class AWQ{

static int NO_OF_CHARS = 256 ;

//A utility function to get maximum of two integers
static int max ( int a, int b) { return (a > b)? a: b; }

//The preprocessing function for Boyer Moore's
{
int i;

// Initialize all occurrences as -1
for (i = 0 ; i < NO_OF_CHARS; i++)

// Fill the actual value of last occurrence
// of a character
for (i = 0 ; i < size; i++)
badchar[( int ) str[i]] = i;
}

/* A pattern searching function that uses Bad
Character Heuristic of Boyer Moore Algorithm */
static void search( char txt[], char pat[])
{
int m = pat.length;
int n = txt.length;

int badchar[] = new int [NO_OF_CHARS];

/* Fill the bad character array by calling
for given pattern */

int s = 0 ;  // s is shift of the pattern with
// respect to text
while (s <= (n - m))
{
int j = m- 1 ;

/* Keep reducing index j of pattern while
characters of pattern and text are
matching at this shift s */
while (j >= 0 && pat[j] == txt展开)
j--;

/* If the pattern is present at current
shift, then index j will become -1 after
the above loop */
if (j < 0 )
{
System.out.println( "Patterns occur at shift = " + s);

/* Shift the pattern so that the next
character in text aligns with the last
occurrence of it in pattern.
The condition s+m < n is necessary for
the case when pattern occurs at the end
of text */
s += (s+m < n)? m-badchar[txt展开] : 1 ;

}

else
/* Shift the pattern so that the bad character
in text aligns with the last occurrence of
it in pattern. The max function is used to
make sure that we get a positive shift.
We may get a negative shift if the last
occurrence  of bad character in pattern
is on the right side of the current
character. */
s += max( 1 , j - badchar[txt展开]);
}
}

/* Driver program to test above function */
public static void main(String []args) {

char txt[] = "ABAAABCD" .toCharArray();
char pat[] = "ABC" .toCharArray();
search(txt, pat);
}
}``````

python

``````# Python3 Program for Bad Character Heuristic
# of Boyer Moore String Matching Algorithm

NO_OF_CHARS = 256

'''
The preprocessing function for
'''

# Initialize all occurrence as -1
badChar = [ - 1 ] * NO_OF_CHARS

# Fill the actual value of last occurrence
for i in range (size):

# retun initialized list

def search(txt, pat):
'''
A pattern searching function that uses Bad Character
Heuristic of Boyer Moore Algorithm
'''
m = len (pat)
n = len (txt)

# create the bad character list by calling
# for given pattern

# s is shift of the pattern with respect to text
s = 0
while (s < = n - m):
j = m - 1

# Keep reducing index j of pattern while
# characters of pattern and text are matching
# at this shift s
while j> = 0 and pat[j] = = txt展开:
j - = 1

# If the pattern is present at current shift, # then index j will become -1 after the above loop
if j< 0 :
print ( "Pattern occur at shift = {}" . format (s))

'''
Shift the pattern so that the next character in text
aligns with the last occurrence of it in pattern.
The condition s+m < n is necessary for the case when
pattern occurs at the end of text
'''
s + = (m - badChar[ ord (txt展开)] if s + m<n else 1 )
else :
'''
Shift the pattern so that the bad character in text
aligns with the last occurrence of it in pattern. The
max function is used to make sure that we get a positive
shift. We may get a negative shift if the last occurrence
of bad character in pattern is on the right side of the
current character.
'''
s + = max ( 1 , j - badChar[ ord (txt展开)])

# Driver program to test above function
def main():
txt = "ABAAABCD"
pat = "ABC"
search(txt, pat)

if __name__ = = '__main__' :
main()

# This code is contributed by Atul Kumar

C#

``````/* C# Program for Bad Character Heuristic of Boyer
Moore String Matching Algorithm */

using System;
public class AWQ{

static int NO_OF_CHARS = 256;

//A utility function to get maximum of two integers
static int max ( int a, int b) { return (a > b)? a: b; }

//The preprocessing function for Boyer Moore's
{
int i;

// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)

// Fill the actual value of last occurrence
// of a character
for (i = 0; i < size; i++)
badchar[( int ) str[i]] = i;
}

/* A pattern searching function that uses Bad
Character Heuristic of Boyer Moore Algorithm */
static void search( char []txt, char []pat)
{
int m = pat.Length;
int n = txt.Length;

int []badchar = new int [NO_OF_CHARS];

/* Fill the bad character array by calling
for given pattern */

int s = 0; // s is shift of the pattern with
// respect to text
while (s <= (n - m))
{
int j = m-1;

/* Keep reducing index j of pattern while
characters of pattern and text are
matching at this shift s */
while (j >= 0 && pat[j] == txt展开)
j--;

/* If the pattern is present at current
shift, then index j will become -1 after
the above loop */
if (j < 0)
{
Console.WriteLine( "Patterns occur at shift = " + s);

/* Shift the pattern so that the next
character in text aligns with the last
occurrence of it in pattern.
The condition s+m < n is necessary for
the case when pattern occurs at the end
of text */
s += (s+m < n)? m-badchar[txt展开] : 1;

}

else
/* Shift the pattern so that the bad character
in text aligns with the last occurrence of
it in pattern. The max function is used to
make sure that we get a positive shift.
We may get a negative shift if the last
occurrence of bad character in pattern
is on the right side of the current
character. */
s += max(1, j - badchar[txt展开]);
}
}

/* Driver program to test above function */
public static void Main() {

char []txt = "ABAAABCD" .ToCharArray();
char []pat = "ABC" .ToCharArray();
search(txt, pat);
}
}

// This code is contributed by PrinciRaj19992``````

``pattern occurs at shift = 4``

Boyer Moore算法|良好的后缀启发式