                     SORTING AND SEARCHING WITH C


                            Matthew Probert

                           Servile  Software

The ANSI C standard defines qsort(), a function for sorting a table of 
data. The function follows the format; 

qsort(void *base,size_t elements,size_t width,int (*cmp)(void *, 
      void *)); 

The following short program illustrates the use of qsort() with a 
character array. 

#include <string.h> 

main()
{ 
    int n; 
    char data[10][20]; 

    /* Initialise some arbirary data */ 

    strcpy(data[0],"RED"); 
    strcpy(data[1],"BLUE"); 
    strcpy(data[2],"GREEN"); 
    strcpy(data[3],"YELLOW"); 
    strcpy(data[4],"INDIGO"); 
    strcpy(data[5],"BROWN"); 
    strcpy(data[6],"BLACK"); 
    strcpy(data[7],"ORANGE"); 
    strcpy(data[8],"PINK"); 
    strcpy(data[9],"CYAN"); 

    /* Sort the data table */ 
    qsort(data[0],10,20,strcmp); 

    /* Print the data table */ 
    for(n = 0; n < 10; n++) 
        puts(data[n]); 
} 


Here is a program that implements the shell sort algorithm (this one 
is based on the routine in K & R), which sorts arrays of pointers 
based upon the data pointed to by the pointers; 

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

#define LINELEN        80 
#define MAXLINES     2000 

char *lines[MAXLINES]; 
int lastone; 

void SHELL(void);

void SHELL() 
{ 
    /* SHELL Sort Courtesy of K & R */ 

    int gap; 
    int i; 
    int j; 
    char temp[LINELEN]; 

    for(gap = lastone / 2; gap > 0; gap /= 2) 
    for(i = gap; i < lastone; i++) 
        for(j = i - gap; j >= 0 && strcmp(lines[j] , lines[j + gap]) > 
           0; j -= gap)
        { 
            strcpy(temp,lines[j]); 
            strcpy(lines[j] , lines[j + gap]); 
            strcpy(lines[j + gap] , temp); 

        } 
} 

void main(int argc, char *argv[]) 
{ 
    FILE *fp; 
    char buff[100]; 
    int n; 

    /* Check command line parameter has been given */ 
    if (argc != 2)
    { 
        printf("\nError: Usage is CSORT file"); 
        exit(0); 
    } 

    /* Attempt to open file for updating */ 
    fp = fopen(argv[1],"r+"); 
    if (fp == NULL)
    { 
        printf("\nError: Unable to open %s",argv[1]); 
        exit(0); 
    } 

    /* Initialise element counter to zero */ 
    lastone = 0; 

    /* Read file to be sorted */ 
    while((fgets(buff,100,fp)) != NULL)
    { 
        /* Allocate memory block*/ 
        lines[lastone] = malloc(LINELEN); 
        if (lines[lastone] == NULL)
        { 
            printf("\nError: Unable to allocate memory"); 
            fclose(fp);
            exit(0); 
        } 
        strcpy(lines[lastone],buff); 
        lastone++; 

        if (lastone > MAXLINES)
        { 
            printf("\nError: Too many lines in source file"); 
            exit(0); 
        } 
    } 
    /* Call sort function */ 
    SHELL(); 

    /* Close file */ 
    fclose(fp); 

    /* Reopen file in create mode */ 
    fp = fopen(argv[1],"w+"); 

    /* Copy sorted data from memory to disk */ 
    for(n = 0; n < lastone; n++) 
        fputs(lines[n],fp); 

    /* Close file finally */ 
    fclose(fp); 

    /* Return to calling program */ 
    return(1); 
} 


If we want to use qsort() with a table of pointers we have to be a bit 
more clever than usual. 

This example uses the colours again, but this time they are stored in 
main memory and indexed by a table of pointers. Because we have a 
table of pointers to sort there are two differences between this 
program's qsort() and the previous one; 

First we can't use strcmp() as the qsort() comparison function, 
secondly the width of the table being sorted is sizeof(char *), that 
is the size of a character pointer. 

Notice the comparison function cmp() that receives two parameters, 
both are pointers to a pointer. qsort() sends to this function the 
values held in data[], which are in turn pointers to the data. So we 
need to use this indirection to locate the data, otherwise we would be 
comparing the addresses at which the data is held rather than the data 
itself! 

#include <alloc.h> 
#include <string.h> 

/* Function prototype for comparison function */ 
int (cmp)(char **,char **); 

int cmp(char **s1, char **s2) 
{ 
    /* comparison function using pointers to pointers */ 
    return(strcmp(*s1,*s2)); 
} 

main()
{ 
    int n; 
    char *data[10]; 

    for(n = 0; n < 10; n++) 
        data[n] = malloc(20); 

    strcpy(data[0],"RED"); 
    strcpy(data[1],"BLUE"); 
    strcpy(data[2],"GREEN"); 
    strcpy(data[3],"YELLOW"); 
    strcpy(data[4],"INDIGO"); 
    strcpy(data[5],"BROWN"); 
    strcpy(data[6],"BLACK"); 
    strcpy(data[7],"ORANGE"); 
    strcpy(data[8],"PINK"); 
    strcpy(data[9],"CYAN"); 

    /* The data table is comprised of pointers */ 
    /* so the call to qsort() must reflect this */ 
    qsort(data,10,sizeof(char *),cmp); 

    for(n = 0; n < 10; n++) 
        puts(data[n]); 
} 

The quick sort is a fast sorting algorithm that works by subdividing 
the data table into two sub-tables and then subdividing the sub-
tables. As it subdivides the table, so it compares the elements in the 
table and swaps them as required. This has the disadvantage that it 
requires a lot of stack space.

The following program implements the quick sort algorithm, which is 
usually already used by qsort(); 

#include <string.h> 

#define MAXELE  2000 

char data[10][20]; 
int lastone; 

void QKSORT() 
{ 
    /* Implementation of QUICKSORT algorithm */ 

    int i; 
    int j; 
    int l; 
    int p; 
    int r; 
    int s; 
    char temp[100]; 
    static int sl[MAXELE][2]; 

    /* sl[] is an index to the sub-table */ 

    l = 0; 
    r = lastone; 
    p = 0; 

    do
    { 
        while(l < r)
        { 
            i = l; 
            j = r; 
            s = -1; 

            while(i < j)
            { 
                if (strcmp(data[i],data[j]) > 0)
                { 
                    strcpy(temp,data[i]); 
                    strcpy(data[i],data[j]); 
                    strcpy(data[j],temp); 
                    s = 0 - s; 
                } 
            
                if (s == 1) 
                    i++; 
                else 
                    j--; 
            } 
           
            if (i + 1 < r)
            { 
                p++; 
                sl[p][0] = i + 1; 
                sl[p][1] = r; 
            } 
            r = i - 1; 
        }       
        if (p != 0)
        { 
            l = sl[p][0]; 
            r = sl[p][1]; 
            p--; 
        } 
    }
    while(p > 0); 
} 

main()
{ 
    int n; 

    /* Initialise arbitrary data */ 
    strcpy(data[0],"RED"); 
    strcpy(data[1],"BLUE"); 
    strcpy(data[2],"GREEN"); 
    strcpy(data[3],"YELLOW"); 
    strcpy(data[4],"INDIGO"); 
    strcpy(data[5],"BROWN"); 
    strcpy(data[6],"BLACK"); 
    strcpy(data[7],"ORANGE"); 
    strcpy(data[8],"PINK"); 
    strcpy(data[9],"CYAN"); 

    /* Set last element indicator */ 
    lastone = 9; 

    /* Call quick sort function */ 
    QKSORT(); 

    /* Display sorted list */ 
    for(n = 0; n < 10; n++) 
        puts(data[n]); 

} 

A table sorted into ascending order can be searched with bsearch(), 
this takes the format; 

bsearch(key,base,num_elements,width,int (*cmp)(void *, void *)); 

bsearch() returns a pointer to the first element in the table that 
matches the key, or zero if no match is found. 

Or you can write your own binary search function thus; 

int BSRCH(char *key, void *data, int numele, int width) 
{ 
    /* A binary search function returning one if found */ 
    /* Zero if not found */ 

    int bp; 
    int tp; 
    int mp; 
    int result; 
    char *p; 

    bp = 0; 
    tp = numele; 
    mp = (tp + bp) / 2; 

    /* Locate element mp in table by assigning pointer to start */ 
    /* and incrementing it by width * mp */ 
    p = data; 
    p += width * mp; 

    while((result = strcmp(p,key)) != 0)
    { 
        if (mp >= tp)
            /* Not found! */ 
            return(0); 
        if (result < 0) 
            bp = mp + 1; 
        else 
            tp = mp - 1; 

        mp = (bp + tp) / 2; 
        p = data; 
        p += width * mp; 
    } 
    return(1); 
} 

void main()
{ 
    int result; 
    char data[10][20]; 

    /* Initialise some arbirary data */ 

    strcpy(data[0],"RED"); 
    strcpy(data[1],"BLUE"); 
    strcpy(data[2],"GREEN"); 
    strcpy(data[3],"YELLOW"); 
    strcpy(data[4],"INDIGO"); 
    strcpy(data[5],"BROWN"); 
    strcpy(data[6],"BLACK"); 
    strcpy(data[7],"ORANGE"); 
    strcpy(data[8],"PINK"); 
    strcpy(data[9],"CYAN"); 

    /* Sort the data table */ 
    qsort(data[0],10,20,strcmp); 

    result = BSRCH("CYAN",data[0],10,20); 

    printf("\n%s\n",(result == 0) ? "Not found" : "Located okay"); 
} 

There are other sorting algorithms as well, some fast, some which 
require very little memory. This program incorporates the QUICK SORT, 
BUBBLE SORT, FAST BUBBLE SORT, INSERTION SORT and SHELL SORT for 
comparing how each performs on a random 1000 item string list; 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char data[1000][4];
char save[1000][4];

int lastone;

void INITDATA(void);
void QKSORT(void);
void SHELL(void);
void BUBBLE(void);
void FBUBBLE(void);
void INSERTION(void);
void MKDATA(void);

void QKSORT()
{
    /* Implementation of QUICKSORT algorithm */

    int i;
    int j;
    int l;
    int p;
    int r;
    int s;
    char temp[20];
    static int sl[1000][2];

    l = 0;
    r = lastone;
    p = 0;

    do
    {
        while(l < r)
        {
            i = l;
            j = r;
            s = -1;

            while(i < j)
            {
                if (strcmp(data[i],data[j]) > 0)
                {
                    strcpy(temp,data[i]);
                    strcpy(data[i],data[j]);
                    strcpy(data[j],temp);
                    s = 0 - s;
                }

                if (s == 1)
                    i++;
                else
                    j--;
            }

            if (i + 1 < r)
            {
                p++;
                sl[p][0] = i + 1;
                sl[p][1] = r;
            }
            r = i - 1;
        }
        if (p != 0)
        {
            l = sl[p][0];
            r = sl[p][1];
            p--;
        }
    }
    while(p > 0);
}

void SHELL()
{
    /* SHELL Sort Courtesy of K & R */

    int gap;
    int i;
    int j;
    char temp[20];

    for(gap = lastone / 2; gap > 0; gap /= 2)
    for(i = gap; i < lastone; i++)
        for(j = i - gap; j >= 0 && strcmp(data[j] , data[j + gap]) > 0;
              j -= gap)
        {
            strcpy(temp,data[j]);
            strcpy(data[j] , data[j + gap]);
            strcpy(data[j + gap] , temp);
        }
}

void BUBBLE()
{
    int a;
    int b;
    char temp[20];

    for(a = lastone; a >= 0; a--)
    {
        for(b = 0; b < a; b++)
        {
            if(strcmp(data[b],data[b + 1]) > 0)
            {
                strcpy(temp,data[b]);
                strcpy(data[b] , data[b + 1]);
                strcpy(data[b + 1] , temp);
            }       
        }
    }
}

void FBUBBLE()
{
    /* bubble sort with swap flag*/

    int a;
    int b;
    int s;
    char temp[20];

    s = 1;

    for(a = lastone; a >= 0 && s == 1; a--)
    {
        s = 0;
        for(b = 0; b < a; b++)
        {
            if(strcmp(data[b],data[b + 1]) > 0)
            {
                strcpy(temp,data[b]);
                strcpy(data[b] , data[b + 1]);
                strcpy(data[b + 1] , temp);
                s = 1;
            }
        }
    }
}

void INSERTION()
{
    int a;
    int b;
    char temp[20];

    for(a = 0; a < lastone; a++)
    {
        b = a;
        strcpy(temp,data[a + 1]);
        while(b >= 0)
        {
            if (strcmp(temp,data[b]) < 0)
            {
                strcpy(data[b+1],data[b]);
                b--;
            }
            else
                break;
        }
        strcpy(data[b+1],temp);
    }
}

void MKDATA()
{
    /* Initialise arbitrary data */
    /* Uses random(), which is not ANSI C */
    /* Returns a random number between 0 and n - 1 */

    int n;
    for(n = 0; n < 1000; n++)
        sprintf(save[n],"%d",random(1000));
}

void INITDATA()
{
    int n;

    for(n = 0 ; n < 1000; n++)
        strcpy(data[n],save[n]);
}

void main()
{
    MKDATA();

    /* Initialise arbitrary data */
    INITDATA();

    /* Set last element indicator */
    lastone = 999;

    /* Call quick sort function */
    QKSORT();


    /* Initialise arbitrary data */
    INITDATA();

    /* Set last element indicator */
    lastone = 1000;

    /* Call shell sort function */
    SHELL();

    /* Initialise arbitrary data */
    INITDATA();

    /* Set last element indicator */
    lastone = 999;

    /* Call bubble sort function */
    BUBBLE();

    /* Initialise arbitrary data */
    INITDATA();

    /* Set last element indicator */
    lastone = 999;

    /* Call bubble sort with swap flag function */
    FBUBBLE();

    /* Initialise arbitrary data */
    INITDATA();

    /* Set last element indicator */
    lastone = 999;

    /* Call insertion sort function */
    INSERTION();
}

Here are the profiler results of the above test program run on 1000 
and 5000 random items; 

STRING SORT - 1000 RANDOM ITEMS

FBUBBLE         26.436 sec  41% |********************************************
BUBBLE          26.315 sec  41% |*******************************************
INSERTION       10.210 sec  15% |***************
SHELL           0.8050 sec   1% |*
QKSORT          0.3252 sec  <1% |

STRING SORT - 5000 RANDOM ITEMS

FBUBBLE         563.70 sec  41% |********************************************
BUBBLE          558.01 sec  41% |********************************************
INSERTION       220.61 sec  16% |***************
SHELL           5.2531 sec  <1% |
QKSORT          0.8379 sec  <1% |


