import { injectable } from 'inversify'; import { CookieJar } from 'jsdom'; import nodeFetch, { RequestInit, Response } from 'node-fetch'; import { inject } from '../../core/inject'; import { CookieSaveError } from '../error/cookie-save-error'; import { IStore } from '../store/i-store'; import { IWebCrawler } from './i-web-crawler'; @injectable() export class WebCrawler implements IWebCrawler { public cookieJar: CookieJar; private initialized: boolean; private store: IStore; public constructor(@inject('store') store: IStore) { this.initialized = false; this.cookieJar = new CookieJar(); this.store = store; } public fetch(url: string, requestInit: RequestInit = {}): Promise { return this.init().then(() => { const cookiedInit = { ...requestInit, ...{ headers: { ...requestInit.headers, ...{ Cookie: this.cookieJar.getCookieStringSync(url), }, }, }, }; return nodeFetch(url, cookiedInit).then((res: Response) => { this.setCookies(res.headers.raw()['set-cookie'], url).catch((reason: Error) => { throw new CookieSaveError(reason.message); }); return res; }); }); } private init(): Promise { if (!this.initialized) { return this.store.load(StoreKey.COOKIES).then((cookies: unknown) => { if (cookies !== undefined) { this.cookieJar = CookieJar.deserializeSync(cookies as string); } this.initialized = true; }); } else { return Promise.resolve(); } } private setCookies(header: string[], url: string): Promise { if (header) { header.forEach((cookie: string) => { this.cookieJar.setCookieSync(cookie, url); }); return this.store.save(StoreKey.COOKIES, this.cookieJar.serializeSync()).catch((reason: Error) => { throw new CookieSaveError(reason.message); }); } return Promise.resolve(); } }