KMP pattern matching


/***
Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that prints all occurrences of pat[] in txt[]. You may assume that n > m.
Examples:
1) Input:
  txt[] =  "THIS IS A TEST TEXT"
  pat[] = "TEST"
Output:
   Pattern found at index 10

2) Input:

  txt[] =  "AABAACAADAABAAABAA"
  pat[] = "AABA"
Output:

   Pattern found at index 0
   Pattern found at index 9
   Pattern found at index 13
***/

#include<iostream>
#include<vector>
#include<string>

using namespace std;

class KMPmatching{
public:
       KMPmatching(string, string);
       vector<int> matching();
       ~ KMPmatching();
private:
       string str;
       string pat;
       int sz, psz;
       int *pmt; // patial match table
       void preprocess();
       bool submatch(int, int);
};

KMPmatching :: KMPmatching(string str, string pat) {
       this->str = str;
       this->pat = pat;
       this->sz = str.size();
       this->psz = pat.size();

       pmt = new int[psz];
       memset(pmt, -1, sizeof(pmt));

       preprocess();
}

KMPmatching :: ~KMPmatching() {
       delete[] pmt;
}

bool KMPmatching :: submatch (int len, int suf) {
       int i = 0;
       while(len) {
              if(pat[i++] != pat[suf-len+1]) return false;
              len--;
       }
       return true;
}

void KMPmatching :: preprocess() {
       int len;
       int id = 0;

       while(id < psz) {
              int k = 1;
              len = 0;
              while(k <= id) {
                     if(!submatch (k, id)) break;
                     len = k;
                     k++;
              }
              pmt[id] = len;
              id++;
       }
}

vector<int> KMPmatching :: matching() {
       vector<int> matchid;
/*     cout<<"pmt: ";
       for (int i = 0; i<psz; i++) cout<<pmt[i]<<" ";
       cout<<endl;*/

       int i = 0;

       while(i <= sz-psz) {
      
              for(int j = 0; j<psz; j++) {
                     if(str[i+j] != pat[j]) {
                           if (j != 0) i += j-pmt[j-1]-1;   
                           break;
                     }
                     if(j == psz-1) matchid.push_back(i);
              }
              i++;
       }
       return matchid;
}

void main() {
       string str = "AABAACAADAABAAABAA";
       string pat = "AABA";
       vector<int> index;

       KMPmatching match(str, pat);
       index = match.matching();

       for (int i = 0; i<index.size(); i++)
              cout<<index[i]<<" ";
       cout<<endl;
}


Comments

  1. if loop nested, separate into different function.
    more concise and more clear.
    Pay attention on edge condition, especially the subscript is i-1. Remember to deal with the situation when i = 0.

    ReplyDelete

Post a Comment

Popular posts from this blog

Sudoku

Longest prefix matching - trie

Climbing Stairs