NodeJs Request Error (ETIMEDOUT) while scraping a webpage
I sometimes get ETIMEDOUT error while scraping a webpage in Node JS (using localhost as a server). I am using Request-Promise to make a request. I couldn't find a way to handle it, it is not giving it always, it appears randomly. I tried decreasing the concurrency but it didn't work. After all I'am not making a hard job. Any suggestions (using another library etc...)?
Here is the error code:
{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
at new RequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-coreliberrors.js:14:15)
at Request.plumbing.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:87:29)
at Request.RP$callback [as _callback] (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:46:31)
at self.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:185:22)
at emitOne (events.js:116:13)
at Request.emit (events.js:211:7)
at Request.onRequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:881:8)
at emitOne (events.js:116:13)
at ClientRequest.emit (events.js:211:7)
at TLSSocket.socketErrorListener (_http_client.js:387:9)
at emitOne (events.js:116:13)
at TLSSocket.emit (events.js:211:7)
at emitErrorNT (internal/streams/destroy.js:66:8)
at _combinedTickCallback (internal/process/next_tick.js:139:11)
at process._tickCallback (internal/process/next_tick.js:181:9)
name: 'RequestError',
message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
cause:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
error:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
options:
{ uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
callback: [Function: RP$callback],
transform: undefined,
simple: true,
resolveWithFullResponse: false,
transform2xxOnly: false },
response: undefined }
The code for scraping:
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');
async function Gundem () {
var posts = ;
try {
const baseUrl = 'https://www.ntv.com.tr';
const mainHtml = await rp(baseUrl);
const $ = cheerio.load(mainHtml);
const links = $(".swiper-slide.color-white).map((i, el) => {
return baseUrl + $(el).children("a").first().attr("href");
}).get();
posts = await Promise.map(links, async (link) => {
try {
const newsHtml = await rp(link);
const $ = cheerio.load(newsHtml);
return {
title: getTitle ($),
newsUrl: $("meta[property='og:url']").attr("content"),
imageUrl: $("meta[property='og:image']").attr("content"),
time: moment($("time").attr("datetime")).valueOf()
}
} catch (e) {
console.log('error scraping ' + link + 'n', e)
}
},
{concurrency:10})
} catch (e) {
console.log('error scraping ntv' +'n', e)
};
fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
if (err) throw err;
});
return posts;
};
node.js promise request
add a comment |
I sometimes get ETIMEDOUT error while scraping a webpage in Node JS (using localhost as a server). I am using Request-Promise to make a request. I couldn't find a way to handle it, it is not giving it always, it appears randomly. I tried decreasing the concurrency but it didn't work. After all I'am not making a hard job. Any suggestions (using another library etc...)?
Here is the error code:
{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
at new RequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-coreliberrors.js:14:15)
at Request.plumbing.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:87:29)
at Request.RP$callback [as _callback] (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:46:31)
at self.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:185:22)
at emitOne (events.js:116:13)
at Request.emit (events.js:211:7)
at Request.onRequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:881:8)
at emitOne (events.js:116:13)
at ClientRequest.emit (events.js:211:7)
at TLSSocket.socketErrorListener (_http_client.js:387:9)
at emitOne (events.js:116:13)
at TLSSocket.emit (events.js:211:7)
at emitErrorNT (internal/streams/destroy.js:66:8)
at _combinedTickCallback (internal/process/next_tick.js:139:11)
at process._tickCallback (internal/process/next_tick.js:181:9)
name: 'RequestError',
message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
cause:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
error:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
options:
{ uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
callback: [Function: RP$callback],
transform: undefined,
simple: true,
resolveWithFullResponse: false,
transform2xxOnly: false },
response: undefined }
The code for scraping:
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');
async function Gundem () {
var posts = ;
try {
const baseUrl = 'https://www.ntv.com.tr';
const mainHtml = await rp(baseUrl);
const $ = cheerio.load(mainHtml);
const links = $(".swiper-slide.color-white).map((i, el) => {
return baseUrl + $(el).children("a").first().attr("href");
}).get();
posts = await Promise.map(links, async (link) => {
try {
const newsHtml = await rp(link);
const $ = cheerio.load(newsHtml);
return {
title: getTitle ($),
newsUrl: $("meta[property='og:url']").attr("content"),
imageUrl: $("meta[property='og:image']").attr("content"),
time: moment($("time").attr("datetime")).valueOf()
}
} catch (e) {
console.log('error scraping ' + link + 'n', e)
}
},
{concurrency:10})
} catch (e) {
console.log('error scraping ntv' +'n', e)
};
fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
if (err) throw err;
});
return posts;
};
node.js promise request
add a comment |
I sometimes get ETIMEDOUT error while scraping a webpage in Node JS (using localhost as a server). I am using Request-Promise to make a request. I couldn't find a way to handle it, it is not giving it always, it appears randomly. I tried decreasing the concurrency but it didn't work. After all I'am not making a hard job. Any suggestions (using another library etc...)?
Here is the error code:
{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
at new RequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-coreliberrors.js:14:15)
at Request.plumbing.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:87:29)
at Request.RP$callback [as _callback] (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:46:31)
at self.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:185:22)
at emitOne (events.js:116:13)
at Request.emit (events.js:211:7)
at Request.onRequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:881:8)
at emitOne (events.js:116:13)
at ClientRequest.emit (events.js:211:7)
at TLSSocket.socketErrorListener (_http_client.js:387:9)
at emitOne (events.js:116:13)
at TLSSocket.emit (events.js:211:7)
at emitErrorNT (internal/streams/destroy.js:66:8)
at _combinedTickCallback (internal/process/next_tick.js:139:11)
at process._tickCallback (internal/process/next_tick.js:181:9)
name: 'RequestError',
message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
cause:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
error:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
options:
{ uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
callback: [Function: RP$callback],
transform: undefined,
simple: true,
resolveWithFullResponse: false,
transform2xxOnly: false },
response: undefined }
The code for scraping:
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');
async function Gundem () {
var posts = ;
try {
const baseUrl = 'https://www.ntv.com.tr';
const mainHtml = await rp(baseUrl);
const $ = cheerio.load(mainHtml);
const links = $(".swiper-slide.color-white).map((i, el) => {
return baseUrl + $(el).children("a").first().attr("href");
}).get();
posts = await Promise.map(links, async (link) => {
try {
const newsHtml = await rp(link);
const $ = cheerio.load(newsHtml);
return {
title: getTitle ($),
newsUrl: $("meta[property='og:url']").attr("content"),
imageUrl: $("meta[property='og:image']").attr("content"),
time: moment($("time").attr("datetime")).valueOf()
}
} catch (e) {
console.log('error scraping ' + link + 'n', e)
}
},
{concurrency:10})
} catch (e) {
console.log('error scraping ntv' +'n', e)
};
fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
if (err) throw err;
});
return posts;
};
node.js promise request
I sometimes get ETIMEDOUT error while scraping a webpage in Node JS (using localhost as a server). I am using Request-Promise to make a request. I couldn't find a way to handle it, it is not giving it always, it appears randomly. I tried decreasing the concurrency but it didn't work. After all I'am not making a hard job. Any suggestions (using another library etc...)?
Here is the error code:
{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
at new RequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-coreliberrors.js:14:15)
at Request.plumbing.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:87:29)
at Request.RP$callback [as _callback] (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequest-promise-corelibplumbing.js:46:31)
at self.callback (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:185:22)
at emitOne (events.js:116:13)
at Request.emit (events.js:211:7)
at Request.onRequestError (C:UsersHHSNodeJs
ProjectsNews_Scrapernode_modulesrequestrequest.js:881:8)
at emitOne (events.js:116:13)
at ClientRequest.emit (events.js:211:7)
at TLSSocket.socketErrorListener (_http_client.js:387:9)
at emitOne (events.js:116:13)
at TLSSocket.emit (events.js:211:7)
at emitErrorNT (internal/streams/destroy.js:66:8)
at _combinedTickCallback (internal/process/next_tick.js:139:11)
at process._tickCallback (internal/process/next_tick.js:181:9)
name: 'RequestError',
message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
cause:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
error:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
options:
{ uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
callback: [Function: RP$callback],
transform: undefined,
simple: true,
resolveWithFullResponse: false,
transform2xxOnly: false },
response: undefined }
The code for scraping:
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');
async function Gundem () {
var posts = ;
try {
const baseUrl = 'https://www.ntv.com.tr';
const mainHtml = await rp(baseUrl);
const $ = cheerio.load(mainHtml);
const links = $(".swiper-slide.color-white).map((i, el) => {
return baseUrl + $(el).children("a").first().attr("href");
}).get();
posts = await Promise.map(links, async (link) => {
try {
const newsHtml = await rp(link);
const $ = cheerio.load(newsHtml);
return {
title: getTitle ($),
newsUrl: $("meta[property='og:url']").attr("content"),
imageUrl: $("meta[property='og:image']").attr("content"),
time: moment($("time").attr("datetime")).valueOf()
}
} catch (e) {
console.log('error scraping ' + link + 'n', e)
}
},
{concurrency:10})
} catch (e) {
console.log('error scraping ntv' +'n', e)
};
fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
if (err) throw err;
});
return posts;
};
node.js promise request
node.js promise request
edited Nov 22 '18 at 21:11
Huseyin Sahin
asked Nov 22 '18 at 21:02
Huseyin SahinHuseyin Sahin
5218
5218
add a comment |
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53437941%2fnodejs-request-error-etimedout-while-scraping-a-webpage%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53437941%2fnodejs-request-error-etimedout-while-scraping-a-webpage%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown