Compute string distance
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#define VERSION "0.0.1"
#define PACKAGE "levenshtein"
void print_help(int exval);
int levenshtein(char *stra, char *strb);
int main(int argc, char *argv[]) {
char *src_str = NULL;
char *trgt_str = NULL;
int opt = 0;
int output_flag_opt = 0;
if(argc == 1)
print_help(0);
while((opt = getopt(argc, argv, "hve")) != -1) {
switch(opt) {
case 'h':
print_help(0);
case 'v':
return 0;
case 'e':
output_flag_opt = 1;
break;
case '?':
fprintf(stderr, "%s: Error - No such option `%c'\n", PACKAGE, optopt);
print_help(1);
} /* switch */
} /* while */
if((argc - optind) != 2) {
fprintf(stderr, "%s: Error - Need `SOURCE' and `TARGET' string\n\n", PACKAGE);
print_help(1);
}
if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) {
fprintf(stderr, "%s: Error - strlen(SOURCE) is either `< 2' or `> 255'\n", PACKAGE);
return 1;
} else
src_str = strdup(argv[optind]);
optind++;
if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) {
fprintf(stderr, "%s: Error - strlen(TARGET) is either `< 2' or `> 255'\n", PACKAGE);
return 1;
} else
trgt_str = strdup(argv[optind]);
if(output_flag_opt == 0)
printf("%d\n", levenshtein(src_str, trgt_str));
else {
printf("Source : %s\n", src_str);
printf("Target : %s\n", trgt_str);
printf("Lev.dist : %d\n", levenshtein(src_str, trgt_str));
}
return 0;
}
int levenshtein(char *stra, char *strb) {
int i, j, k, l, m1, m2, m3, laa, r;
int lengtha, lengthb, lengthmin;
int **d;
char *a;
lengtha = strlen(stra);
lengthb = strlen(strb);
lengthmin = lengtha;
if(lengthb > lengthmin) lengthmin = lengthb;
a = (char*)malloc((lengthmin+2)*sizeof(char));
d = (int**)malloc((lengthmin+2)*sizeof(int*));
for(i = 0; i < lengthmin + 2; i++) d[i] = (int*)malloc((lengthmin+2)*sizeof(int));
for(i = 0; i <= lengtha; i++) a[i] = stra[i];
laa = lengtha;
d[0][0] = 0;
for(i = 1; i <= lengtha; i++) d[i][0] = d[i-1][0]+1;
for(j = 1; j <= lengthb; j++) d[0][j] = d[0][j-1]+1;
for(i = 1; i <= laa; i++) {
for(j = 1;j <= lengthb; j++) {
r = 0;
if(a[i-1] != strb[j-1]) {
r = 1;
a[i-1] = strb[j-1];
}
m1 = d[i-1][j-1]+r;
for(k = lengtha; k >= i-1; k--) a[k+1] = a[k];
a[j-1] = strb[j-1];
lengtha++;
m2 = d[i][j-1]+1;
for(k = i-1; k < lengtha; k++) a[k] = a[k+1];
lengtha--;
m3 = d[i-1][j]+1;
r = m1;
if( m2 < r) r = m2;
if(m3 < r) r = m3;
d[i][j] = r;
lengtha = laa;
for(l = 0; l <= lengtha; l++) a[l] = stra[l];
}
}
r = d[lengtha][lengthb];
free(a);
for(i = 0; i < lengthmin+2; i++) free(d[i]);
free(d);
return(r);
}
void print_help(int exval) {
printf("%s,%s is a measure of the similarity inbetween two strings\n",
PACKAGE, VERSION);
printf("Usage: %s [-h] [-v] [-e] SOURCE TARGET\n\n", PACKAGE);
printf(" -h print this help and exit\n");
printf(" -v print version and exit\n");
printf(" -e print the source, target string and levenshtein distance\n\n");
exit(exval);
}
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#define VERSION "0.0.1"
#define PACKAGE "levenshtein"
void print_help(int exval);
int levenshtein(char *stra, char *strb);
int main(int argc, char *argv[]) {
char *src_str = NULL;
char *trgt_str = NULL;
int opt = 0;
int output_flag_opt = 0;
if(argc == 1)
print_help(0);
while((opt = getopt(argc, argv, "hve")) != -1) {
switch(opt) {
case 'h':
print_help(0);
case 'v':
return 0;
case 'e':
output_flag_opt = 1;
break;
case '?':
fprintf(stderr, "%s: Error - No such option `%c'\n", PACKAGE, optopt);
print_help(1);
} /* switch */
} /* while */
if((argc - optind) != 2) {
fprintf(stderr, "%s: Error - Need `SOURCE' and `TARGET' string\n\n", PACKAGE);
print_help(1);
}
if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) {
fprintf(stderr, "%s: Error - strlen(SOURCE) is either `< 2' or `> 255'\n", PACKAGE);
return 1;
} else
src_str = strdup(argv[optind]);
optind++;
if(strlen(argv[optind]) < 2 || strlen(argv[optind]) > 255) {
fprintf(stderr, "%s: Error - strlen(TARGET) is either `< 2' or `> 255'\n", PACKAGE);
return 1;
} else
trgt_str = strdup(argv[optind]);
if(output_flag_opt == 0)
printf("%d\n", levenshtein(src_str, trgt_str));
else {
printf("Source : %s\n", src_str);
printf("Target : %s\n", trgt_str);
printf("Lev.dist : %d\n", levenshtein(src_str, trgt_str));
}
return 0;
}
int levenshtein(char *stra, char *strb) {
int i, j, k, l, m1, m2, m3, laa, r;
int lengtha, lengthb, lengthmin;
int **d;
char *a;
lengtha = strlen(stra);
lengthb = strlen(strb);
lengthmin = lengtha;
if(lengthb > lengthmin) lengthmin = lengthb;
a = (char*)malloc((lengthmin+2)*sizeof(char));
d = (int**)malloc((lengthmin+2)*sizeof(int*));
for(i = 0; i < lengthmin + 2; i++) d[i] = (int*)malloc((lengthmin+2)*sizeof(int));
for(i = 0; i <= lengtha; i++) a[i] = stra[i];
laa = lengtha;
d[0][0] = 0;
for(i = 1; i <= lengtha; i++) d[i][0] = d[i-1][0]+1;
for(j = 1; j <= lengthb; j++) d[0][j] = d[0][j-1]+1;
for(i = 1; i <= laa; i++) {
for(j = 1;j <= lengthb; j++) {
r = 0;
if(a[i-1] != strb[j-1]) {
r = 1;
a[i-1] = strb[j-1];
}
m1 = d[i-1][j-1]+r;
for(k = lengtha; k >= i-1; k--) a[k+1] = a[k];
a[j-1] = strb[j-1];
lengtha++;
m2 = d[i][j-1]+1;
for(k = i-1; k < lengtha; k++) a[k] = a[k+1];
lengtha--;
m3 = d[i-1][j]+1;
r = m1;
if( m2 < r) r = m2;
if(m3 < r) r = m3;
d[i][j] = r;
lengtha = laa;
for(l = 0; l <= lengtha; l++) a[l] = stra[l];
}
}
r = d[lengtha][lengthb];
free(a);
for(i = 0; i < lengthmin+2; i++) free(d[i]);
free(d);
return(r);
}
void print_help(int exval) {
printf("%s,%s is a measure of the similarity inbetween two strings\n",
PACKAGE, VERSION);
printf("Usage: %s [-h] [-v] [-e] SOURCE TARGET\n\n", PACKAGE);
printf(" -h print this help and exit\n");
printf(" -v print version and exit\n");
printf(" -e print the source, target string and levenshtein distance\n\n");
exit(exval);
}
No comments:
Post a Comment