C programming language - ch6

Home

Table of Contents

http://www.c4learn.com/c-programming/c-pointer-invalid-operations/

1 Compiling the examples

Compile with the standard ./configure ; make

Additionally add the following make options to turn off optimizations and enable debugging symbols

./configure

make CXXFLAGS='-g3 -Wall -O0 '  CFLAGS='-g3 -Wall -O0 '

2 Autotools template example

Please check the Autotools template example

3 examples

3.1 keywordcount.c

C keyword count - Array version

#include <stdio.h>
#include <ctype.h>
#include <string.h>

// getch  ungetch
#include "getoper.h"

#define MAXWORD 100

#define NKEYS (sizeof keytab / sizeof(struct key))

int getword(char *, int);

struct key {
    char *word;
    int count;
};

struct key keytab[] = {
    "auto", 0,
    "break", 0,
    "case", 0,
    "char", 0,
    "const", 0,
    "continue", 0,
    "default", 0,
    /* ... */
    "unsigned", 0,
    "void", 0,
    "volatile", 0,
    "while", 0
};

int binsearch(char *word, struct key *, int n);

/* count C keywords  */
main()
{

    int n;
    char word[MAXWORD];

    while (getword(word, MAXWORD) != EOF)
        if (isalpha(word[0]))
            if ((n = binsearch(word, keytab, NKEYS)) >= 0 )
                keytab[n].count++;
    for (n = 0; n < NKEYS; n++)
        if (keytab[n].count > 0)
            printf ("%4d %s\n", keytab[n].count, keytab[n].word );

    return 0;

}



/* binsearch: find word in tab[0] ... tab[n-1]   */
int binsearch(char *word, struct key tab[], int n)
{
    int cond;
    int low, high, mid;

    low = 0;
    high = n -1;
    while (low <= high) {
        mid = (low+high) / 2;
        if ((cond = strcmp(word, tab[mid].word)) < 0)
            high = mid -1;
        else if (cond > 0)
            low = mid + 1;
        else
            return mid;
    }
    return -1;

}

3.1.1 getoper.h

#include <ctype.h>

#ifndef _GETOPER_H
#define _GETOPER_H


#define MAXOP 100  /* max size of operand or operator */


#define NUMBER '0'  /* signal that a number was found */

#define BUFSIZE 100


int getoper(char s[]);
int getch2(void);
void ungetch2(int);

/* getword:  get next word or character from input */
int getword(char *word, int lim);


#endif /*  _GETOPER_H  */

3.1.2 getoper.c

#include <stdio.h>
#include <ctype.h>
#include "getoper.h"


/*
 External static provides a way to hide names like buf and bufp in the getch-ungetch combination, which must be external so they can be shared, yet which should not be visible to users of getch and ungetch.
In this example, no other routine outside this file will be able to access buf and bufp, and those names will not conflict with the same names in other files of the same program.
*/

static char buf[BUFSIZE]; /* buffer for ungetch2 */
static int bufp = 0;     /* next free position in buf */


/* getop: get next character or numeric operand */
int getoper(char s[])
{

    int i, c;

    while ((s[0] = c = getch2()) == ' ' || c == '\t')
        ;
    s[1] = '\0';
    if (!isdigit(c) && c != '.')
        return c;   /* not a number */
    i = 0;
    if (isdigit(c)) /* collect integer part */
        while (isdigit(s[++i] = c = getch2()) )
            ;
    if (c == '.') /* collect fraction part */
        while (isdigit(s[++i] = c = getch2()))
            ;
    s[i] = '\0';
    if (c != EOF)
        ungetch2(c);
    return NUMBER;

}

int getch2(void)  /* get a (possibly pushed-back) character */
{
    return (bufp > 0) ? buf[--bufp] : getchar();
}

/*
  The standar library includes a function ungetch that provides one character of pushback;  We have used an array for the pushback, rather than a single character, to illustrate a more general approach.
 */

void ungetch2(int c)  /* push character back on input */
{
    if (bufp >= BUFSIZE )
        printf ("ungetch2: too many characters\n");
    else
        buf[bufp++] = c;

}




/* getword:  get next word or character from input */
int getword(char *word, int lim)
{
    int c;
    char *w = word;

    while (isspace(c = getch2()))
        ;
    if (c != EOF)
        *w++ = c;
    if (!isalpha(c)) {
        *w = '\0';
        return c;
    }
    for ( ; --lim > 0; w++)
        if (!isalnum(*w = getch2())) {
            ungetch2(*w);
            break;
        }
    *w = '\0';
    return word[0];
}

3.2 keywordcount2.c

C keyword count - Pointers version

/* count C keywords; pointer version  */

#include <stdio.h>
#include <ctype.h>
#include <string.h>

// getch  ungetch
#include "getoper.h"

#define MAXWORD 100

#define NKEYS (sizeof keytab / sizeof(struct key))

int getword(char *, int);

struct key {
    char *word;
    int count;
};

struct key keytab[] = {
    "auto", 0,
    "break", 0,
    "case", 0,
    "char", 0,
    "const", 0,
    "continue", 0,
    "default", 0,
    /* ... */
    "unsigned", 0,
    "void", 0,
    "volatile", 0,
    "while", 0
};

struct key *binsearch(char *word, struct key *, int n);


int main(int argc, char *argv[])
{
    char word[MAXWORD];

    struct key *p;

    while (getword(word, MAXWORD) != EOF)
        if (isalpha(word[0]))
            if ((p=binsearch(word, keytab, NKEYS)) != NULL)
                p->count++;
    for (p = keytab; p < keytab + NKEYS; p++)
        if (p->count > 0)
            printf ("%4d %s\n", p->count, p->word);

    return 0;
}


/* binsearch: find word in tab[0] ... tab[n-1] */
struct key *binsearch(char *word, struct key *tab, int n)
{

    int cond;
    struct key *low = &tab[0];
    struct key *high = &tab[n];
    struct key *mid;

    while (low < high) {

        mid = low + (high-low) / 2;
        if ((cond = strcmp(word, mid->word)) < 0 )
            high = mid;
        else if (cond > 0)
            low = mid + 1;
        else
            return mid;

    }
    return NULL;

}

There are sevearl things worthy of note here. First, the declaration of binsearch must indicate that it returns a pointer to struct key instead of an integer; if binsearch finds the word, it returns a pointer to it; if it fails, it returns NULL.

Second, the elements of keytab are now accessed by pointers. This requires significant changes in binsearch.

The initializers for low and high are now pointers to the beginning and just past the end of the table.

The computation of the middle element can no longer be simply

mid = (low+high) / 2;   /* WRONG */

because the addition of pointers is illegal. Subtraction is legal, however, so high - low is the number of elements, and thus

mid = low + (high-low) /2;

sets mid to the element halfway between low and high.

The most important change is to adjust the alrogithm to make sure that it does not generate an illegal pointer or attempt to access an element outside the array. The problem is that &tab[-1] and &tab[n] are both outside the limits of the array tab. The former is strictly illegal, and it is illegal to dereference that latter. The language definition does guarantee, however, that pointer arithmetic that involves the first element beyond the end of an array (that is, &tab[n]) will work correctly.

4 Self-referential structures

Binary tree

The tree contains one "node" per distinct word; each node contains:

  • A pointer to the next of the word,
  • A count of the number of occurrences,
  • A pointer to the left child node,
  • A pointer to the right child node

No node may have more than two children; it might have only zero or one.

The nodes are maintained so that at any node the left subtree contains only words that are lexicographically less than the word at the node, and the right subtree contains only words that are greater.

#include <stdio.h>
#include <ctype.h>   /* isalpha */
#include <string.h>  /* strcmp */
#include <stdlib.h>  /* malloc  */
#include "getoper.h"

#define MAXWORD  100

struct tnode {           /* the tree node: */
    char *word;          /* points to the text */
    int count;           /* number of occurrences */
    struct tnode *left;  /* left child */
    struct tnode *right; /* right child */
};


struct tnode *addtree(struct tnode *, char *);
void treeprint(struct tnode *);
struct tnode *talloc(void);
char *strdup2(char *);


int main(int argc, char *argv[])
{
    struct tnode *root;
    char word[MAXWORD];

    root = NULL;
    while (getword(word, MAXWORD) != EOF )
        if (isalpha(word[0]))
            root = addtree(root, word);
    treeprint(root);

    return 0;
}

/* addtree: add a node with w, at or below p */
struct tnode *addtree(struct tnode *p, char *w)
{
    int cond;

    if (p == NULL) {    /* a new word has arrived */
        p = talloc();  /* make a new node */
        p->word = strdup2(w);
        p->count = 1;
        p->left = p->right = NULL;
    } else if ((cond = strcmp(w, p->word)) == 0)
        p->count++;  /* repeated word */
    else if (cond < 0)  /* less than into left subtree */
        p->left = addtree(p->left, w);
    else     /* greater than into right subtree */
        p->right = addtree(p->right, w);
    return p;

}



/* treeprint: in-order print of tree p */
void treeprint(struct tnode *p)
{
    if (p != NULL) {
        treeprint(p->left);
        printf ("%4d %s\n", p->count, p->word);
        treeprint(p->right);
    }

}

/*
  A practical note: if the tree becomes "unbalanced" because the words don't arrive in random order, the running time of the program can grow too much. As a worst case, if the words are already in order, this program does an expensive simulation of linear search.
 */


/* talloc: make a tnode */
struct tnode *talloc(void)
{
  return (struct tnode *) malloc(sizeof(struct tnode));
}


/*  strdup2 copies the string given ty its argument into a safe place, obtained by a call on malloc
NOTE:  there is already a strdup function in string.h  that does the same thing.
*/
char *strdup2(char *s)
{
    char *p;

    p = (char *) malloc(strlen(s)+1);  /* +1 for '\0' */
    if (p != NULL)
        strcpy(p, s);
    return p;

}

5 Table Lookup

6 Typedef

Notice that the type being declared in a typedef appears in the position of a variable name

typedef int Length;


Length len, maxlen;
Length *lengths[];

typedef char *String;


typedef struct tnode *Treeptr;

typedef struct tnode {
  char *word;
  int count;
  struct tnode *left;
  struct tnode *right;
} Treenode;

Treeptr talloc(void)
{
  return (Treeptr) malloc(sizeof(Treenode));
}

Creates type PFI, pointer to function (of two char * arguments) returning int

typedef int (*PFI) (char *, char *)

PFI strcmp, numcmp;

Author: root

Created: 2019-05-22 Wed 17:54

Emacs 25.2.2 (Org mode 8.2.10)

Validate