-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
281 lines (233 loc) · 8.72 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
const fetch = require("node-fetch");
const path = require("path");
const fs = require("fs");
const readline = require("readline");
const prompt = require("prompt");
const JSDOM = require("jsdom").JSDOM;
const imgToPDF = require('image-to-pdf')
readline.emitKeypressEvents(process.stdin);
class Parser {
/**
* 解析從 `CNS.getImageUrl` 取得的 XML
* @param {string} xml
* @returns {string}
*/
static checksum(xml) {
const dom = new JSDOM(xml);
const q = selector => dom.window.document.querySelector(selector);
const message = q("Message").textContent;
// 格式:<StatusMsg>,<Checksum>
// 範例:Successful!,599ce71fbcff60a1bee5fa3a0d32dc657b487360
const checksum = message.split(",")[1];
return checksum;
}
/**
* 解析從 CNS.getMetadata 取得的 HTML
* 只會解析第一筆資料
* @param {string} html
*/
static metadata(html) {
const dom = new JSDOM(html);
const q = (selector, root = dom.window.document) => root.querySelector(selector);
const qs = (selector, root = dom.window.document) => [...root.querySelectorAll(selector)];
if (q("table")) {
const titleZh = q(".search_form_line.name li:nth-child(1)").textContent;
const titleEn = q(".search_form_line.name li:nth-child(2)").textContent;
const updateDate = (() => {
// nth-of-type 不支援 class,只能這樣篩選
const parent = qs(".search_form_line").filter((e, i) => i === 1)[0];
const skyblue = qs(".sky_blue", parent).filter((e, i) => i === 1)[0];
const updateDate = skyblue.textContent.replace(/\//g, "-");
return updateDate;
})();
const totalPagesHref = q(".btn_05 a").getAttribute("href");
const totalPages = totalPagesHref.match(/accessPreview\(.+?(\d+)\);/)[1];
return {
titleZh,
titleEn,
updateDate,
totalPages
};
}
return null;
}
/**
* 取得使用者輸入的文字
* @returns {Promise<string>}
*/
static async promptValue() {
console.log(`如要退出程式,請輸入 "exit"`);
const value = await new Promise(resolve => prompt.get([" "], function (err, res) {
console.log();
if (res) {
resolve(res[" "]);
} else {
resolve(null);
}
}));
if (value === "exit") {
process.exit();
}
return value;
}
/**
* 取得使用者輸入的值,並判斷是否為 Y/y
* @returns {Promise<boolean>}
*/
static async promptBoolean() {
console.log(`如要退出程式,請輸入 "exit"`);
const value = await new Promise(resolve => prompt.get([" "], function (err, res) {
console.log();
if (res) {
resolve(res[" "]);
} else {
resolve(null);
}
}));
if (value === "exit") {
process.exit();
}
return value === "Y" || value === "y";
}
}
class CNS {
/**
* 取得圖片路徑
* @param {string} id
* @param {number} page
* @returns {Promise<string>}
*/
static async getImageUrl(id, page) {
const xml = await fetch("https://www.cnsonline.com.tw/preview/GetData", {
"headers": {
"accept": "application/xml, text/xml, */*; q=0.01",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
},
"body": `generalno=${id}&version=zh_TW&pageNum=${page}`,
"method": "POST",
}).then(
r => r.text()
);
const checksum = Parser.checksum(xml);
const url = `https://www.cnsonline.com.tw/preview/GenerateImage?generalno=${id}&version=zh_TW&pageNum=${page}&checksum=${checksum}`;
return url;
}
/**
* 搜尋
* @param {string} id
*/
static async getMetadata(id) {
const html = await fetch("https://www.cnsonline.com.tw/", {
"headers": {
"content-type": "application/x-www-form-urlencoded",
"upgrade-insecure-requests": "1"
},
"body": `node=result&typeof=quick&locale=zh_TW&searchfield=${id}`,
"method": "POST",
}).then(
r => r.text()
);
return Parser.metadata(html);
}
/**
* 下載圖片
* @param {string} id
* @param {number} page
* @param {string} path
*/
static async downloadImage(id, page, rootPath) {
const filePath = path.join(rootPath, `${page}.jpg`);
const url = await CNS.getImageUrl(id, page);
const res = await fetch(url);
const fileStream = fs.createWriteStream(filePath);
await new Promise((resolve, reject) => {
res.body.pipe(fileStream);
res.body.on("error", reject);
fileStream.on("finish", resolve);
});
}
}
(async () => {
while (true) {
// ================ [搜尋關鍵字] ================ //
console.log("========================================");
console.log("請輸入要下載的國家標準(CNS)關鍵字(請儘可能精準,若一次找到多筆資料將只會下載第一筆)");
const id = await Parser.promptValue();
if(id === null) {
continue;
}
console.log(`正在搜尋關鍵字 "${id}"...`);
const metadata = await CNS.getMetadata(id);
if (metadata === null) {
console.log(`沒有找到符合的資料。\n\n`);
continue;
}
console.log(`找到資料 (${id}):`);
console.log(` - 中文標題:${metadata.titleZh}`);
console.log(` - 英文標題:${metadata.titleEn}`);
console.log(` - 最後更新日期:${metadata.updateDate}`);
console.log(` - 頁數:${metadata.totalPages}\n`);
console.log("是否下載這筆資料?");
console.log(` Y/y - 下載`);
console.log(` Other - 不要下載,重新搜尋其他關鍵字`);
const confirmDownload = await Parser.promptBoolean();
if (confirmDownload !== true) {
continue;
}
// ================ [確定下載,檢查目錄] ================ //
const root = path.join(__dirname, "downloads");
if (!fs.existsSync(root)) {
fs.mkdirSync(root);
}
const idRoot = path.join(root, id);
if (!fs.existsSync(idRoot)) {
fs.mkdirSync(idRoot);
} else {
console.log(`準備下載 ${id},但發現存在目錄 ${idRoot}`);
console.log(`是否取代既有的資料?`);
console.log(` Y/y - 取代資料,程式將把既有目錄刪除並重新下載`);
console.log(` Other - 保留資料,結束此次下載任務,重新搜尋其他關鍵字`);
const confirmOverwrite = await Parser.promptBoolean();
if (confirmOverwrite) {
fs.rmSync(idRoot, { recursive: true, force: true });
fs.mkdirSync(idRoot);
} else {
continue;
}
}
// ================ [開始下載] ================ //
for (let i = 1; i <= metadata.totalPages; i++) {
process.stdout.write(`\r 下載中${".".repeat(i % 3 + 1)} (${i}/${metadata.totalPages}) `);
await CNS.downloadImage(id, i, idRoot);
}
console.log(`\n下載完成\n`);
// ================ [轉成 PDF 格式] ================ //
console.log(`正在產生 PDF 檔案...`);
const pages = readFilesSync(idRoot);
const filePath = path.join(root, `${id}.pdf`);
imgToPDF(pages, imgToPDF.sizes.A4).pipe(fs.createWriteStream(filePath));
console.log(`已完成\n`);
}
/**
* 取得指定目錄底下所有檔案(以檔名排序,數字模式)
* @param {string} dir
* @returns {Buffer[]}
*/
function readFilesSync(dir) {
const files = [];
fs.readdirSync(dir).forEach(filename => {
const name = path.parse(filename).name;
const filePath = path.resolve(dir, filename);
const stat = fs.statSync(filePath);
const isFile = stat.isFile();
if (isFile) files.push({
filePath,
name
});
});
files.sort((a, b) => {
return a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: 'base' });
});
return files.map(e => fs.readFileSync(e.filePath));
}
})();