Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
100 views
in Technique[技术] by (71.8m points)

node.js - .push is not a function in web crawler

I am writing a node JS web crawler class, and I have encountered the following error, this.textInvertedIndex[word].push is not a function. Upon further inspection I realised that for some reason this.textInvertedIndex[word] was written as a native object, function Object({ [native code] }). For the first few iterations, by console logging this.textInvertedIndex everything seemed fine as it was an object of arrays. But then suddenly this error occurred. Is there any part of the code where I am implicitly rewriting textInvertedIndex?

Here is the relevant class:

function Crawler(queue, maxIndexSize) {
  this.queue = queue;
  this.maxIndexSize = maxIndexSize;

  this.findChunks = () => {
    let currentChunk;
    let minimumDistance = Infinity;

    for (i = 1; i <= this.maxIndexSize; i++) {
      if (this.maxIndexSize % i === 0) {
        const newDistance = Math.abs(i - 30);

        if (newDistance < minimumDistance) {
          minimumDistance = newDistance;
          currentChunk = i;
        } else {
          return currentChunk
        };
      };
    };
  };

  this.chunks = this.findChunks();
  this.chunkSize = this.maxIndexSize / this.chunks;
  this.totalWordOccurances = {};
  this.imageInvertedIndex = {};
  this.textInvertedIndex = {};
  this.images = [];
  this.sites = [];
  this.seen = {};

  this.write = (url, html) => {
    const documentId = this.sites.length;
    const website = new Website(url, html);
    const title = website.title();
    const content = website.content(title);
    const words = content.filter(item => typeof item !== "object");
    const wordsLength = words.length;
    const query = new Query(words);
    const individualWords = query.individualize(words);

    this.seen[url] = true;

    this.sites.push({
      url,
      title,
      description: website.description()
    });

    for (word of individualWords) {
      const normalizedTf = query.count(word) / wordsLength;
      const textInvertedIndexEntry = {
        documentId,
        normalizedTf
      };

      if (this.textInvertedIndex[word]) {
        this.textInvertedIndex[word].push(textInvertedIndexEntry);
      } else {
        this.textInvertedIndex[word] = [textInvertedIndexEntry];
      };

      if (this.totalWordOccurances[word]) {
        this.totalWordOccurances[word] += 1;
      } else {
        this.totalWordOccurances[word] = 1;
      };
    };

    for (i = 0; i < content.length; i++) {
      const item = content[i];

      if (typeof item === "object") {
        const imageId = this.images.length;

        this.images.push(item);

        for (word of individualWords) {
          const imageScore = getImageScore(i, word, content);
          const imageInvertedIndexEntry = {
            imageId,
            imageScore
          };

          if (this.imageInvertedIndex[word]) {
            this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
          } else {
            this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
          };
        };
      };
    };
  };

  this.crawl = async () => {
    while (this.sites.length !== this.maxIndexSize) {
      let nextQueue = [];
      const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
        const website = new Website(url);

        return website.request();
      }));
      const websitesToAdd = this.maxIndexSize - this.sites.length;
      let websites = websitesUnfiltered.filter(message => message !== "Failure")
                                       .slice(0, websitesToAdd);
      
      for (site of websites) {
        const url = site.url;
        const htmlCode = site.htmlCode;
        const website = new Website(url, htmlCode);

        this.write(url, htmlCode);

        nextQueue = nextQueue.concat(website.urls());
      };

      nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
                                      .individualize();
      this.queue = nextQueue;
    };
  };
};

Called like this

const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();
question from:https://stackoverflow.com/questions/65617353/push-is-not-a-function-in-web-crawler

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

this.textInvertedIndex = {}; is defining an Object of which push is not a valid function. you can change it to an array by defining it as this.textInvertedIndex = []; otherwise you can add key/value entries to the object as it is defined like this: this.textInvertedIndex[key] = value;


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

1.4m articles

1.4m replys

5 comments

57.0k users

...