Tuesday, September 10, 2013

A Naive Implementation of Rabin Karp


Compile with gcc filename.c -lm -Werror -Wall

/*
 *
 * Program to check if a string is a substring of another
 * string. We will use Rolling Hash for this. Rabin Algorithm
 *

 */

#include
#include
#include

/*
 * The PRIME has to be big but for debugging I have taken a smaller value
 */
#define PRIME 5

/*
 * Function which will find the initial hash
 */
long hash(char *str, int window, int prime) {
        unsigned long hash=0;

        int i, temp;
        for(i=0; i                temp=str[i];
                temp=temp*pow(prime, window-1-i);
                hash=hash+temp;
        }
        return hash;
}

/*
 * It finds the rolling has by removing the first character and
 * adding one more character
 */

long rehash(char *str, int window, int prime, long oldhash) {
        char first=str[0];
        char last=str[window];
        long temp=first*pow(prime,window-1);
        long hash=oldhash-temp;
        hash=hash*prime;
        hash=hash+last;
        return hash;
}


/*
 * substr function works hard to find if the string is a
 * substring or not.
 */

char *substr(char *haystack, char *needle) {
        int i=0;
        long temphash;
        int window=strlen(needle);

        /*
         * Find the hash of the needle
         */
        long needleHash=hash(needle, window, PRIME);

#ifdef DEBUG
        /*
         * This part of the code just prints the different hash for the
         * particular window
         */
        printf("\nHash of the needle is %ld", needleHash);
        for (i=0; i                printf("\nRolling hash of %.*s is %ld", window, haystack+i, temphash=hash(haystack+i, window, PRIME ));
        }
#endif
        /*
         * Find the hash of the first window
         */
        temphash=hash(haystack, window, PRIME);

        /*
         * Find the hashes for the next windows
         */
        for (i=1; i                temphash=rehash(haystack+i-1, window, PRIME, temphash);

#ifdef DEBUG
                printf("\nRolling hash Actual of %.*s is %ld", window, haystack+i, temphash);
#endif
                /*
                 * You have the hash of the next window, check
                 * against the needle hash
                 */
                if (temphash==needleHash) {
                        printf("\nHere is the start of the string \"%s\"", haystack+i);
                        /*
                         * Here if you wish you can write code to check it
                         * byte by byte to rule out chances of collison
                         */
                        return haystack+i;
                        return 0;
                }
        }
        return NULL;
}

/*
 * main
 */
int main() {
        char haystack[]="I Love My Country Very Much";
        char needle[]="Much";
        char *str=substr(haystack, needle);
        if (str) {
                printf("\nThe substr is \"%s\"", str);
        } else {
                printf("\nCould not find the substring");
        }
       
        str=substr("HelloWorldIsTheTestString", "Hey");
        if (str) {
                printf("\nThe substr is \"%s\"", str);
        } else {
                printf("\nCould not find the substring");
        }
        return 0;
}