2020-07-25 02:02:37 +02:00
|
|
|
import { inject, injectable } from 'inversify';
|
2020-03-02 23:15:44 +01:00
|
|
|
import { CookieJar } from 'jsdom';
|
|
|
|
import nodeFetch, { RequestInit, Response } from 'node-fetch';
|
2020-07-25 02:02:37 +02:00
|
|
|
import { CookieSaveError } from '../error/cookie-save-error';
|
|
|
|
import { IStore } from '../store/i-store';
|
2020-04-10 05:02:19 +02:00
|
|
|
import { IWebCrawler } from './i-web-crawler';
|
2020-02-08 23:26:57 +01:00
|
|
|
|
|
|
|
@injectable()
|
|
|
|
export class WebCrawler implements IWebCrawler {
|
|
|
|
public cookieJar: CookieJar;
|
|
|
|
|
|
|
|
private initialized: boolean;
|
|
|
|
|
2020-07-25 02:02:37 +02:00
|
|
|
private store: IStore;
|
|
|
|
|
|
|
|
public constructor(@inject(Symbol.for('store')) store: IStore) {
|
2020-02-08 23:26:57 +01:00
|
|
|
this.initialized = false;
|
|
|
|
this.cookieJar = new CookieJar();
|
2020-07-25 02:02:37 +02:00
|
|
|
this.store = store;
|
2020-02-08 23:26:57 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public fetch(url: string, requestInit: RequestInit = {}): Promise<Response> {
|
|
|
|
return this.init().then(() => {
|
|
|
|
const cookiedInit = {
|
|
|
|
...requestInit,
|
|
|
|
...{
|
|
|
|
headers: {
|
|
|
|
...requestInit.headers,
|
|
|
|
...{
|
|
|
|
Cookie: this.cookieJar.getCookieStringSync(url),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return nodeFetch(url, cookiedInit).then((res: Response) => {
|
2020-07-25 02:02:37 +02:00
|
|
|
this.setCookies(res.headers.raw()['set-cookie'], url).catch((reason: Error) => {
|
|
|
|
throw new CookieSaveError(reason.message);
|
2020-02-08 23:26:57 +01:00
|
|
|
});
|
|
|
|
return res;
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
private init(): Promise<void> {
|
|
|
|
if (!this.initialized) {
|
2020-07-25 02:02:37 +02:00
|
|
|
return this.store.load(StoreKey.COOKIES).then((cookies: unknown) => {
|
2020-02-08 23:26:57 +01:00
|
|
|
if (cookies !== undefined) {
|
2020-07-25 02:02:37 +02:00
|
|
|
this.cookieJar = CookieJar.deserializeSync(cookies as string);
|
2020-02-08 23:26:57 +01:00
|
|
|
}
|
|
|
|
this.initialized = true;
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private setCookies(header: string[], url: string): Promise<void> {
|
|
|
|
if (header) {
|
|
|
|
header.forEach((cookie: string) => {
|
|
|
|
this.cookieJar.setCookieSync(cookie, url);
|
|
|
|
});
|
2020-07-25 02:02:37 +02:00
|
|
|
return this.store.save(StoreKey.COOKIES, this.cookieJar.serializeSync()).catch((reason: Error) => {
|
|
|
|
throw new CookieSaveError(reason.message);
|
2020-02-08 23:26:57 +01:00
|
|
|
});
|
|
|
|
}
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
|
|
|
}
|