I am writing a node JS web crawler class, and I have encountered the following error, this.textInvertedIndex[word].push is not a function
. Upon further inspection I realised that for some reason this.textInvertedIndex[word]
was written as a native object, function Object({ [native code] })
. For the first few iterations, by console logging this.textInvertedIndex everything seemed fine as it was an object of arrays. But then suddenly this error occurred. Is there any part of the code where I am implicitly rewriting textInvertedIndex?
Here is the relevant class:
function Crawler(queue, maxIndexSize) {
this.queue = queue;
this.maxIndexSize = maxIndexSize;
this.findChunks = () => {
let currentChunk;
let minimumDistance = Infinity;
for (i = 1; i <= this.maxIndexSize; i++) {
if (this.maxIndexSize % i === 0) {
const newDistance = Math.abs(i - 30);
if (newDistance < minimumDistance) {
minimumDistance = newDistance;
currentChunk = i;
} else {
return currentChunk
};
};
};
};
this.chunks = this.findChunks();
this.chunkSize = this.maxIndexSize / this.chunks;
this.totalWordOccurances = {};
this.imageInvertedIndex = {};
this.textInvertedIndex = {};
this.images = [];
this.sites = [];
this.seen = {};
this.write = (url, html) => {
const documentId = this.sites.length;
const website = new Website(url, html);
const title = website.title();
const content = website.content(title);
const words = content.filter(item => typeof item !== "object");
const wordsLength = words.length;
const query = new Query(words);
const individualWords = query.individualize(words);
this.seen[url] = true;
this.sites.push({
url,
title,
description: website.description()
});
for (word of individualWords) {
const normalizedTf = query.count(word) / wordsLength;
const textInvertedIndexEntry = {
documentId,
normalizedTf
};
if (this.textInvertedIndex[word]) {
this.textInvertedIndex[word].push(textInvertedIndexEntry);
} else {
this.textInvertedIndex[word] = [textInvertedIndexEntry];
};
if (this.totalWordOccurances[word]) {
this.totalWordOccurances[word] += 1;
} else {
this.totalWordOccurances[word] = 1;
};
};
for (i = 0; i < content.length; i++) {
const item = content[i];
if (typeof item === "object") {
const imageId = this.images.length;
this.images.push(item);
for (word of individualWords) {
const imageScore = getImageScore(i, word, content);
const imageInvertedIndexEntry = {
imageId,
imageScore
};
if (this.imageInvertedIndex[word]) {
this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
} else {
this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
};
};
};
};
};
this.crawl = async () => {
while (this.sites.length !== this.maxIndexSize) {
let nextQueue = [];
const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
const website = new Website(url);
return website.request();
}));
const websitesToAdd = this.maxIndexSize - this.sites.length;
let websites = websitesUnfiltered.filter(message => message !== "Failure")
.slice(0, websitesToAdd);
for (site of websites) {
const url = site.url;
const htmlCode = site.htmlCode;
const website = new Website(url, htmlCode);
this.write(url, htmlCode);
nextQueue = nextQueue.concat(website.urls());
};
nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
.individualize();
this.queue = nextQueue;
};
};
};
Called like this
const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();
question from:
https://stackoverflow.com/questions/65617353/push-is-not-a-function-in-web-crawler 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…