Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
259 views
in Technique[技术] by (71.8m points)

c - Valgrind: Where is the memory leak occurring?

I've been trying to write a spell checker program that takes a dictionary and a text file to find the misspelled words as fast as possible. The first version seems to work (albeit slow) except Valgrind gives me over 1000000 errors! I have no idea of what went wrong, except that something went horribly so. I'd like to know why this happens.

Speller (by CS50):

// Implements a spell-checker by CS50

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"

// Undefine any definitions
#undef calculate
#undef getrusage

// Default dictionary
#define DICTIONARY "dictionaries/large"

// Prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // Check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: ./speller [DICTIONARY] text
");
        return 1;
    }

    // Structures for timing data
    struct rusage before, after;

    // Benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // Determine dictionary to use
    char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // Load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // Exit if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.
", dictionary);
        return 1;
    }
    

    // Calculate time to load dictionary
    time_load = calculate(&before, &after);

    // Try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *file = fopen(text, "r");
    if (file == NULL)
    {
        printf("Could not open %s.
", text);
        unload();
        return 1;
    }

    // Prepare to report misspellings
    printf("
MISSPELLED WORDS

");

    // Prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH + 1];

    // Spell-check each word in text
    char c;
    while (fread(&c, sizeof(char), 1, file))
    {
        // Allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == ''' && index > 0))
        {
            // Append character to word
            word[index] = c;
            index++;

            // Ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // Consume remainder of alphabetical string
                while (fread(&c, sizeof(char), 1, file) && isalpha(c));

                // Prepare for new word
                index = 0;
            }
        }

        // Ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // Consume remainder of alphanumeric string
            while (fread(&c, sizeof(char), 1, file) && isalnum(c));

            // Prepare for new word
            index = 0;
        }

        // We must have found a whole word
        else if (index > 0)
        {
            // Terminate current word
            word[index] = '';

            // Update counter
            words++;

            // Check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // Update benchmark
            time_check += calculate(&before, &after);

            // Print word if misspelled
            if (misspelled)
            {
                printf("%s
", word);
                misspellings++;
            }

            // Prepare for next word
            index = 0;
        }
    }

    // Check whether there was an error
    if (ferror(file))
    {
        fclose(file);
        printf("Error reading %s.
", text);
        unload();
        return 1;
    }

    // Close text
    fclose(file);

    // Determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // Calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // Unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // Abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.
", dictionary);
        return 1;
    }

    // Calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // Report benchmarks
    printf("
WORDS MISSPELLED:     %d
", misspellings);
    printf("WORDS IN DICTIONARY:  %d
", n);
    printf("WORDS IN TEXT:        %d
", words);
    printf("TIME IN load:         %.2f
", time_load);
    printf("TIME IN check:        %.2f
", time_check);
    printf("TIME IN size:         %.2f
", time_size);
    printf("TIME IN unload:       %.2f
", time_unload);
    printf("TIME IN TOTAL:        %.2f

",
           time_load + time_check + time_size + time_unload);

    // Success
    return 0;
}

// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                  (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                 ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                  (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

Dictionary uploading and other operations (by me):

// Implements a dictionary's functionality

#include <stdbool.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// Number of buckets in hash table
const unsigned long long int N = 50;

// Hash table
node *table[N];

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    char temp_w[LENGTH];
    unsigned int key = hash(word);
    for(node *temp = table[key]; temp != NULL; temp = temp->next)
    {
        for(int i = 0; i < LENGTH; i++)
        {
            temp_w[i] = tolower(word[i]);
        }
        if(strcmp(temp->word, temp_w) == 0)
        {
            return true;
        }
    }
    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)         //Hashed using djb2
{

    unsigned long hash = 5381;
    int c = *word;
    c = tolower(c);
    while (*word != 0)
    {
        hash = ((hash << 5) + hash) + c;
        c = *word++;
        c = tolower(c);

    }

    return hash % N;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    char str[LENGTH];
    char counter[1] = "0";
    FILE *dict = fopen(dictionary,"r");
    node *temp = NULL;
    if(dict == NULL)
    {
        //printf("Error: 
", strerror(errno));
        return false;
    }
    for(int i = 0; i < N; i++)
    {
        table[i] = NULL;
    }
    while(fscanf(dict, "%s", str) == 1)
    {
        unsigned int key = hash(str);
        //int key = rand();
        temp = malloc(sizeof(node));
        if(temp == NULL)
        {
            return false;
        }
        if(table[key] == NULL)
        {
            table[key] = malloc(sizeof(node));
            table[key]->next = NULL;
            strcpy(table[key]->word, str);
            
        }
        else //if(table[key]->next != NULL)
        {
            temp->next = table[key];
            strcpy(temp->word, str);
            table[key] = temp;
        }
    }
    fclose(dict);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    long long int words = 0;
    for(int i = 0; i < N; i++)
    {
        /*if(strcmp(table[i]->word, "0") != 0 && table[i]->next == NULL)
        {
            words++;
            continue;
        }
        else*/
        if(table[i] != NULL)
        {
            for(node *temp = table[i]; temp != NULL; temp = temp -> next)
            {
                words++;
            }
        }
    }
    return words;
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    node *temp1 = NULL;
    node *temp2 = NULL;
    for(int i = 0; i < N; i++)
    {
        temp1 = table[i];
        while(temp1 != NULL)
        {
            if(temp1->next != NULL)
            {
                temp2 = temp1 -> next;
                free(temp1);
                temp1 = temp2;
            }
            else free(temp1);
        }
        free(temp2);
        free(table[i]);
    }
    return true;
}

Valgrind error message:

 ~/pset5/speller/ $ valgrind ./speller texts/constitution.txt
    ==472== Memcheck, a memory error detector
    ==472== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
    ==472== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
    ==472== Command: ./speller texts/constitution.txt
    ==472== 
    
MISSPELLED WORDS

==472== Conditional jump or move depends on uninitialised value(s)
==472==    at 0x49D9143: tolower (ctype.c:46)
==472==    by 0x4019E5: check (dictionary.c:35)
==472==    by 0x4015FB: main (speller.c:114)
==472==  Uninitialised value was created by a stack allocation
==472==    at 0x4011F4: main (speller.c:21)
==472== 
==472== Use of uninitialised value of size 8
==472==    at 0x49D9157: tolower (ctype.c:46)
==472==    by 0x49D9157: tolower (ctype.c:44)
==472==    by 0x4019E5: check (dictionary.c:35)
==472==    by 0x4015FB: main (speller.c:114)
==472==  Uninitialised value was created by a stack allocation
==472==    at 0x4011F4: main (speller.c:21)
==472== 
USConstitution
http
usconstitution
const
html
==472== Invalid read of size 8
==472==    at 0x401D4C: unload (dictionary.c:139)
==472==    by 0x401758: main (speller.c:154)
==472==  Address 0x4b9cd50 is 48 bytes inside a block of size 56 free'd
==472==    at 0x483CA3F: free (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==472==    by 0x401D87: unload (dictionary.c:145)
==472==    by 0x401758: main (speller.c:154)
==472==  Block was alloc'd at
==472==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==472==    by 0x401BCE: load (dictionary.c:90)
==472==    by 0x4012BE: main (speller.c:40)
==472== 
==472== Invalid free() / delete / delete[] / realloc()
==472==    at 0x483CA3F: free (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==472==    by 0x401D87: unload (dictionary.c:145)
==472==    by 0x401758: main (speller.c:154)
==472==  Address 0x4b9cd20 is 0 bytes inside a block of size 56 free'd
==472==    at 0x483CA3F: free (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==472==    by 0x401D87: unload (dictionary.c:145)
==472==    by 0x401758: main (speller.c:154)
==472==  Block was alloc'd

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

The only thing I found so far is that you free temp2 in unload() when you clearly already freed it by calling free on temp1.


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...