Create distinct wordlist sorted on frequency
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXLINE 1024
#define MAXTOKENS 256
#define MINLEN 3
struct tnode {
char *word;
int count;
struct tnode *left, *right;
};
char **split(char *, char *);
struct tnode *addtree(struct tnode *, char *);
struct tnode *talloc(void);
void treeprint(struct tnode *);
void treefree(struct tnode *);
void arrayprint(struct tnode **, int);
int compare(const void *, const void *);
int tnodecount(struct tnode *);
int treetoarray(struct tnode *, struct tnode **);
int main(void) {
char *delim = ".,:;`'\"+-_(){}[]<>*&^%$#@!?~/|\\= \t\r\n1234567890";
char **tokens = NULL;
char line[MAXLINE];
struct tnode *root = {0};
struct tnode **tarray = {0};
int i, len, lcount, ncount;
i = len = lcount = ncount = 0;
while(fgets(line, MAXLINE, stdin) != NULL) {
len = strlen(line);
if(len < MINLEN)
continue;
else
lcount++;
if(line[len - 1] == '\n')
line[--len] = '\0';
tokens = split(line, delim);
for(i = 0; tokens[i] != NULL; i++)
root = addtree(root, tokens[i]);
for(i = 0; tokens[i] != NULL; i++)
free(tokens[i]);
free(tokens);
}
ncount = tnodecount(root);
if(ncount == 0)
return 1;
/* treeprint(root); */
tarray = malloc(ncount * sizeof(struct tnode *));
if(tarray == NULL)
return 1;
if(treetoarray(root, tarray) == -1)
return 1;
qsort(tarray, ncount, sizeof(struct tnode *), compare);
arrayprint(tarray, ncount);
treefree(root);
return 0;
}
struct tnode *addtree(struct tnode *p, char *w) {
int cond;
if(p == NULL) {
p = talloc();
p->word = strdup(w);
p->count = 1;
p->left = p->right = NULL;
} else if((cond = strcmp(w, p->word)) == 0)
p->count++;
else if(cond < 0)
p->left = addtree(p->left, w);
else
p->right = addtree(p->right, w);
return p;
}
struct tnode *talloc(void) {
return(struct tnode *)malloc(sizeof(struct tnode));
}
void treeprint(struct tnode *p) {
if(p != NULL) {
treeprint(p->left);
printf("%4d %s\n", p->count, p->word);
treeprint(p->right);
}
}
void treefree(struct tnode *p) {
if(p != NULL) {
treefree(p->left);
treefree(p->right);
free(p->word);
free(p);
}
return;
}
void arrayprint(struct tnode **p, int x) {
int i = 0;
for(i = 0; i < x; i++)
printf("%4d %s\n", p[i]->count, p[i]->word);
return;
}
char **split(char *string, char *delim) {
char **tokens = NULL;
char *working = NULL;
char *token = NULL;
int idx = 0;
tokens = malloc(sizeof(char *) * MAXTOKENS);
if(tokens == NULL)
return NULL;
working = malloc(sizeof(char) * strlen(string) + 1);
if(working == NULL)
return NULL;
strcpy(working, string);
for(idx = 0; idx < MAXTOKENS; idx++)
tokens[idx] = NULL;
token = strtok(working, delim);
idx = 0;
while((idx < (MAXTOKENS - 1)) && (token != NULL)) {
tokens[idx] = malloc(sizeof(char) * strlen(token) + 1);
if(tokens[idx] != NULL) {
strcpy(tokens[idx], token);
idx++;
token = strtok(NULL, delim);
}
}
free(working);
return tokens;
}
int treetoarray(struct tnode *tree, struct tnode **array) {
static struct tnode **write = NULL;
if(tree == NULL)
return -1;
if(array != NULL)
write = array;
if(tree != NULL) {
*write = tree, write++;
if(tree->left != NULL)
treetoarray(tree->left, NULL);
if(tree->right != NULL)
treetoarray(tree->right, NULL);
}
return 0;
}
int compare(const void *x, const void *y) {
struct tnode * const *a = x;
struct tnode * const *b = y;
if(a == NULL || b == NULL)
return -2;
else {
if((*a)->count < (*b)->count)
return 1;
else if((*a)->count > (*b)->count)
return -1;
else
return 0;
}
}
int tnodecount(struct tnode *p) {
if(p == NULL)
return 0;
else {
if(p->left == NULL && p->right == NULL)
return 1;
else
return(1 + (tnodecount(p->left) + tnodecount(p->right)));
}
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXLINE 1024
#define MAXTOKENS 256
#define MINLEN 3
struct tnode {
char *word;
int count;
struct tnode *left, *right;
};
char **split(char *, char *);
struct tnode *addtree(struct tnode *, char *);
struct tnode *talloc(void);
void treeprint(struct tnode *);
void treefree(struct tnode *);
void arrayprint(struct tnode **, int);
int compare(const void *, const void *);
int tnodecount(struct tnode *);
int treetoarray(struct tnode *, struct tnode **);
int main(void) {
char *delim = ".,:;`'\"+-_(){}[]<>*&^%$#@!?~/|\\= \t\r\n1234567890";
char **tokens = NULL;
char line[MAXLINE];
struct tnode *root = {0};
struct tnode **tarray = {0};
int i, len, lcount, ncount;
i = len = lcount = ncount = 0;
while(fgets(line, MAXLINE, stdin) != NULL) {
len = strlen(line);
if(len < MINLEN)
continue;
else
lcount++;
if(line[len - 1] == '\n')
line[--len] = '\0';
tokens = split(line, delim);
for(i = 0; tokens[i] != NULL; i++)
root = addtree(root, tokens[i]);
for(i = 0; tokens[i] != NULL; i++)
free(tokens[i]);
free(tokens);
}
ncount = tnodecount(root);
if(ncount == 0)
return 1;
/* treeprint(root); */
tarray = malloc(ncount * sizeof(struct tnode *));
if(tarray == NULL)
return 1;
if(treetoarray(root, tarray) == -1)
return 1;
qsort(tarray, ncount, sizeof(struct tnode *), compare);
arrayprint(tarray, ncount);
treefree(root);
return 0;
}
struct tnode *addtree(struct tnode *p, char *w) {
int cond;
if(p == NULL) {
p = talloc();
p->word = strdup(w);
p->count = 1;
p->left = p->right = NULL;
} else if((cond = strcmp(w, p->word)) == 0)
p->count++;
else if(cond < 0)
p->left = addtree(p->left, w);
else
p->right = addtree(p->right, w);
return p;
}
struct tnode *talloc(void) {
return(struct tnode *)malloc(sizeof(struct tnode));
}
void treeprint(struct tnode *p) {
if(p != NULL) {
treeprint(p->left);
printf("%4d %s\n", p->count, p->word);
treeprint(p->right);
}
}
void treefree(struct tnode *p) {
if(p != NULL) {
treefree(p->left);
treefree(p->right);
free(p->word);
free(p);
}
return;
}
void arrayprint(struct tnode **p, int x) {
int i = 0;
for(i = 0; i < x; i++)
printf("%4d %s\n", p[i]->count, p[i]->word);
return;
}
char **split(char *string, char *delim) {
char **tokens = NULL;
char *working = NULL;
char *token = NULL;
int idx = 0;
tokens = malloc(sizeof(char *) * MAXTOKENS);
if(tokens == NULL)
return NULL;
working = malloc(sizeof(char) * strlen(string) + 1);
if(working == NULL)
return NULL;
strcpy(working, string);
for(idx = 0; idx < MAXTOKENS; idx++)
tokens[idx] = NULL;
token = strtok(working, delim);
idx = 0;
while((idx < (MAXTOKENS - 1)) && (token != NULL)) {
tokens[idx] = malloc(sizeof(char) * strlen(token) + 1);
if(tokens[idx] != NULL) {
strcpy(tokens[idx], token);
idx++;
token = strtok(NULL, delim);
}
}
free(working);
return tokens;
}
int treetoarray(struct tnode *tree, struct tnode **array) {
static struct tnode **write = NULL;
if(tree == NULL)
return -1;
if(array != NULL)
write = array;
if(tree != NULL) {
*write = tree, write++;
if(tree->left != NULL)
treetoarray(tree->left, NULL);
if(tree->right != NULL)
treetoarray(tree->right, NULL);
}
return 0;
}
int compare(const void *x, const void *y) {
struct tnode * const *a = x;
struct tnode * const *b = y;
if(a == NULL || b == NULL)
return -2;
else {
if((*a)->count < (*b)->count)
return 1;
else if((*a)->count > (*b)->count)
return -1;
else
return 0;
}
}
int tnodecount(struct tnode *p) {
if(p == NULL)
return 0;
else {
if(p->left == NULL && p->right == NULL)
return 1;
else
return(1 + (tnodecount(p->left) + tnodecount(p->right)));
}
}
No comments:
Post a Comment