Skip to content

Commit 452528b

Browse files
authored
Add CSV export feature (#217)
* POC: Add CSV export * Export CSV refactor - Wait for file to be downloaded - Delete tmp file and folder after * fix for gRPC mode * remove unnecessary dep * return CSV file name * fix wait function * fix for Linux * fix download timeout * delete tmp folder if a filePath is provided * undo dev change * separate render and renderCSV features + fix reusable and clustered mode for csv * fix RenderCSVRequest proto * fixes after merge * add await * fix function name
1 parent 38a111c commit 452528b

File tree

9 files changed

+378
-43
lines changed

9 files changed

+378
-43
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"@grpc/grpc-js": "^1.0",
2222
"@grpc/proto-loader": "^0.5.4",
2323
"@hapi/boom": "^9.1.0",
24+
"chokidar": "^3.5.1",
2425
"eslint": "^7.13.0",
2526
"express": "^4.16.3",
2627
"express-prom-bundle": "^5.1.5",

proto/rendererv2.proto

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ message RenderResponse {
2424
string error = 1;
2525
}
2626

27+
message RenderCSVRequest {
28+
string url = 1;
29+
string filePath = 2;
30+
string renderKey = 3;
31+
string domain = 4;
32+
int32 timeout = 5;
33+
string timezone = 6;
34+
map<string, StringList> headers = 7;
35+
}
36+
37+
message RenderCSVResponse {
38+
string error = 1;
39+
string fileName = 2;
40+
}
41+
2742
service Renderer {
2843
rpc Render(RenderRequest) returns (RenderResponse);
44+
rpc RenderCSV(RenderCSVRequest) returns (RenderCSVResponse);
2945
}

src/browser/browser.ts

Lines changed: 127 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import * as os from 'os';
22
import * as uniqueFilename from 'unique-filename';
33
import * as puppeteer from 'puppeteer';
4+
import * as chokidar from 'chokidar';
5+
import * as path from 'path';
6+
import * as fs from 'fs';
47
import { Logger } from '../logger';
58
import { RenderingConfig } from '../config';
69

@@ -23,10 +26,26 @@ export interface RenderOptions {
2326
headers?: HTTPHeaders;
2427
}
2528

29+
export interface RenderCSVOptions {
30+
url: string;
31+
filePath: string;
32+
timeout: string | number;
33+
renderKey: string;
34+
domain: string;
35+
timezone?: string;
36+
encoding?: string;
37+
headers?: HTTPHeaders;
38+
}
39+
2640
export interface RenderResponse {
2741
filePath: string;
2842
}
2943

44+
export interface RenderCSVResponse {
45+
filePath: string;
46+
fileName?: string;
47+
}
48+
3049
export class Browser {
3150
constructor(protected config: RenderingConfig, protected log: Logger) {
3251
this.log.debug('Browser initialized', 'config', this.config);
@@ -48,15 +67,31 @@ export class Browser {
4867

4968
async start(): Promise<void> {}
5069

51-
validateOptions(options: RenderOptions) {
70+
validateRenderOptions(options: RenderOptions | RenderCSVOptions) {
5271
if (options.url.startsWith(`socket://`)) {
5372
// Puppeteer doesn't support socket:// URLs
5473
throw new Error(`Image rendering in socket mode is not supported`);
5574
}
5675

76+
options.headers = options.headers || {};
77+
const headers = {};
78+
79+
if (options.headers['Accept-Language']) {
80+
headers['Accept-Language'] = options.headers['Accept-Language'];
81+
} else if (this.config.acceptLanguage) {
82+
headers['Accept-Language'] = this.config.acceptLanguage;
83+
}
84+
85+
options.headers = headers;
86+
87+
options.timeout = parseInt(options.timeout as string, 10) || 30;
88+
}
89+
90+
validateImageOptions(options: RenderOptions) {
91+
this.validateRenderOptions(options);
92+
5793
options.width = parseInt(options.width as string, 10) || this.config.width;
5894
options.height = parseInt(options.height as string, 10) || this.config.height;
59-
options.timeout = parseInt(options.timeout as string, 10) || 30;
6095

6196
if (options.width < 10) {
6297
options.width = this.config.width;
@@ -79,17 +114,6 @@ export class Browser {
79114
if (options.deviceScaleFactor > this.config.maxDeviceScaleFactor) {
80115
options.deviceScaleFactor = this.config.deviceScaleFactor;
81116
}
82-
83-
options.headers = options.headers || {};
84-
const headers = {};
85-
86-
if (options.headers['Accept-Language']) {
87-
headers['Accept-Language'] = options.headers['Accept-Language'];
88-
} else if (this.config.acceptLanguage) {
89-
headers['Accept-Language'] = this.config.acceptLanguage;
90-
}
91-
92-
options.headers = headers;
93117
}
94118

95119
getLauncherOptions(options) {
@@ -111,12 +135,28 @@ export class Browser {
111135
return launcherOptions;
112136
}
113137

138+
async preparePage(page: any, options: any) {
139+
if (this.config.verboseLogging) {
140+
this.log.debug('Setting cookie for page', 'renderKey', options.renderKey, 'domain', options.domain);
141+
}
142+
await page.setCookie({
143+
name: 'renderKey',
144+
value: options.renderKey,
145+
domain: options.domain,
146+
});
147+
148+
if (options.headers && Object.keys(options.headers).length > 0) {
149+
this.log.debug(`Setting extra HTTP headers for page`, 'headers', options.headers);
150+
await page.setExtraHTTPHeaders(options.headers);
151+
}
152+
}
153+
114154
async render(options: RenderOptions): Promise<RenderResponse> {
115155
let browser;
116156
let page: any;
117157

118158
try {
119-
this.validateOptions(options);
159+
this.validateImageOptions(options);
120160
const launcherOptions = this.getLauncherOptions(options);
121161
browser = await puppeteer.launch(launcherOptions);
122162
page = await browser.newPage();
@@ -152,19 +192,7 @@ export class Browser {
152192
deviceScaleFactor: options.deviceScaleFactor,
153193
});
154194

155-
if (this.config.verboseLogging) {
156-
this.log.debug('Setting cookie for page', 'renderKey', options.renderKey, 'domain', options.domain);
157-
}
158-
await page.setCookie({
159-
name: 'renderKey',
160-
value: options.renderKey,
161-
domain: options.domain,
162-
});
163-
164-
if (options.headers && Object.keys(options.headers).length > 0) {
165-
this.log.debug(`Setting extra HTTP headers for page`, 'headers', options.headers);
166-
await page.setExtraHTTPHeaders(options.headers);
167-
}
195+
await this.preparePage(page, options);
168196

169197
if (this.config.verboseLogging) {
170198
this.log.debug('Moving mouse on page', 'x', options.width, 'y', options.height);
@@ -202,6 +230,78 @@ export class Browser {
202230
return { filePath: options.filePath };
203231
}
204232

233+
async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
234+
let browser;
235+
let page: any;
236+
237+
try {
238+
this.validateRenderOptions(options);
239+
const launcherOptions = this.getLauncherOptions(options);
240+
browser = await puppeteer.launch(launcherOptions);
241+
page = await browser.newPage();
242+
this.addPageListeners(page);
243+
244+
return await this.exportCSV(page, options);
245+
} finally {
246+
if (page) {
247+
this.removePageListeners(page);
248+
await page.close();
249+
}
250+
if (browser) {
251+
await browser.close();
252+
}
253+
}
254+
}
255+
256+
async exportCSV(page: any, options: any): Promise<RenderCSVResponse> {
257+
await this.preparePage(page, options);
258+
259+
const downloadPath = uniqueFilename(os.tmpdir());
260+
fs.mkdirSync(downloadPath);
261+
const watcher = chokidar.watch(downloadPath);
262+
let downloadFilePath = '';
263+
watcher.on('add', file => {
264+
if (!file.endsWith('.crdownload')) {
265+
downloadFilePath = file;
266+
}
267+
});
268+
269+
await page._client.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: downloadPath });
270+
271+
if (this.config.verboseLogging) {
272+
this.log.debug('Navigating and waiting for all network requests to finish', 'url', options.url);
273+
}
274+
275+
await page.goto(options.url, { waitUntil: 'networkidle0', timeout: options.timeout * 1000 });
276+
277+
if (this.config.verboseLogging) {
278+
this.log.debug('Waiting for download to end');
279+
}
280+
281+
const startDate = Date.now();
282+
while (Date.now() - startDate <= options.timeout * 1000) {
283+
if (downloadFilePath !== '') {
284+
break;
285+
}
286+
await new Promise(resolve => setTimeout(resolve, 500));
287+
}
288+
289+
if (downloadFilePath === '') {
290+
throw new Error(`Timeout exceeded while waiting for download to end`);
291+
}
292+
293+
await watcher.close();
294+
295+
let filePath = downloadFilePath;
296+
if (options.filePath) {
297+
fs.renameSync(downloadFilePath, options.filePath);
298+
filePath = options.filePath;
299+
fs.rmdirSync(path.dirname(downloadFilePath));
300+
}
301+
302+
return { filePath, fileName: path.basename(downloadFilePath) };
303+
}
304+
205305
addPageListeners(page: any) {
206306
page.on('error', this.logError);
207307
page.on('pageerror', this.logPageError);

src/browser/clustered.ts

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
11
import { Cluster } from 'puppeteer-cluster';
2-
import { Browser, RenderResponse, RenderOptions } from './browser';
2+
import { Browser, RenderResponse, RenderOptions, RenderCSVOptions, RenderCSVResponse } from './browser';
33
import { Logger } from '../logger';
44
import { RenderingConfig, ClusteringConfig } from '../config';
55

6+
enum RenderType {
7+
CSV = 'csv',
8+
PNG = 'png',
9+
}
10+
11+
interface ClusterOptions {
12+
options: RenderOptions | RenderCSVOptions;
13+
renderType: RenderType;
14+
}
15+
16+
type ClusterResponse = RenderResponse | RenderCSVResponse;
17+
618
export class ClusteredBrowser extends Browser {
7-
cluster: Cluster<any, RenderResponse>;
19+
cluster: Cluster<ClusterOptions, ClusterResponse>;
820
clusteringConfig: ClusteringConfig;
921
concurrency: number;
1022

@@ -27,22 +39,33 @@ export class ClusteredBrowser extends Browser {
2739
puppeteerOptions: launcherOptions,
2840
});
2941
await this.cluster.task(async ({ page, data }) => {
30-
if (data.timezone) {
42+
if (data.options.timezone) {
3143
// set timezone
32-
await page.emulateTimezone(data.timezone);
44+
await page.emulateTimezone(data.options.timezone);
3345
}
3446

3547
try {
3648
this.addPageListeners(page);
37-
return await this.takeScreenshot(page, data);
49+
switch (data.renderType) {
50+
case RenderType.CSV:
51+
return await this.exportCSV(page, data.options);
52+
case RenderType.PNG:
53+
default:
54+
return await this.takeScreenshot(page, data.options);
55+
}
3856
} finally {
3957
this.removePageListeners(page);
4058
}
4159
});
4260
}
4361

4462
async render(options: RenderOptions): Promise<RenderResponse> {
45-
this.validateOptions(options);
46-
return await this.cluster.execute(options);
63+
this.validateImageOptions(options);
64+
return this.cluster.execute({ options, renderType: RenderType.PNG });
65+
}
66+
67+
async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
68+
this.validateRenderOptions(options);
69+
return this.cluster.execute({ options, renderType: RenderType.CSV });
4770
}
4871
}

src/browser/reusable.ts

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as puppeteer from 'puppeteer';
2-
import { Browser, RenderResponse, RenderOptions } from './browser';
2+
import { Browser, RenderResponse, RenderOptions, RenderCSVResponse, RenderCSVOptions } from './browser';
33
import { Logger } from '../logger';
44
import { RenderingConfig } from '../config';
55

@@ -20,7 +20,7 @@ export class ReusableBrowser extends Browser {
2020
let page: puppeteer.Page | undefined;
2121

2222
try {
23-
this.validateOptions(options);
23+
this.validateImageOptions(options);
2424
context = await this.browser.createIncognitoBrowserContext();
2525
page = await context.newPage();
2626

@@ -42,4 +42,32 @@ export class ReusableBrowser extends Browser {
4242
}
4343
}
4444
}
45+
46+
async renderCSV(options: RenderCSVOptions): Promise<RenderCSVResponse> {
47+
let context: puppeteer.BrowserContext | undefined;
48+
let page: puppeteer.Page | undefined;
49+
50+
try {
51+
this.validateRenderOptions(options);
52+
context = await this.browser.createIncognitoBrowserContext();
53+
page = await context.newPage();
54+
55+
if (options.timezone) {
56+
// set timezone
57+
await page.emulateTimezone(options.timezone);
58+
}
59+
60+
this.addPageListeners(page);
61+
62+
return await this.exportCSV(page, options);
63+
} finally {
64+
if (page) {
65+
this.removePageListeners(page);
66+
await page.close();
67+
}
68+
if (context) {
69+
await context.close();
70+
}
71+
}
72+
}
4573
}

0 commit comments

Comments
 (0)