Skip to content

Commit

Permalink
step
Browse files Browse the repository at this point in the history
  • Loading branch information
Miniast committed Apr 30, 2024
1 parent 6fc0409 commit 8276305
Show file tree
Hide file tree
Showing 7 changed files with 315 additions and 61 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
node_modules
.vscode
archive
pnpm-lock.yaml
dist
18 changes: 16 additions & 2 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 18 additions & 12 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import { isFunction, setDefaults, flattenDeep } from "./lib/utils.js";
import { getValidOptions, alignOptions } from "./options.js";
import type { crawlerOptions, requestOptions } from "./types/crawler.js";
import { promisify } from "util";
import { load } from "cheerio";
import got from "got";
import seenreq from "seenreq";
import iconv from "iconv-lite";
import cheerio from "cheerio";

//@todo change log method
process.env.NODE_ENV = process.env.NODE_ENV ?? process.argv[2] ?? "debug";

if (process.env.NODE_ENV !== "debug") {
Expand Down Expand Up @@ -89,11 +90,11 @@ class Crawler extends EventEmitter {
return charset;
};

// private _getContentType = (headers: Record<string, string>): string[] => {
// let contentType = headers["content-type"];
// if (!contentType) return [];
// return contentType.split(";").map((type: string) => type.trim());
// }
private _checkHtml = (headers: Record<string, string>): boolean => {
const contentType = headers["content-type"];
if (/xml|html/i.test(contentType)) return true;
return false;
};

private _schedule = async (options: crawlerOptions): Promise<void> => {
this.emit("schedule", options);
Expand Down Expand Up @@ -198,12 +199,17 @@ class Crawler extends EventEmitter {
resError = error;
}

// @todo: jQuery injection

// const injectableTypes = ["html", "xhtml", "text/xml", "application/xml", "+xml"];
// if (this._getContentType(response.headers).some(type => injectableTypes.includes(type))) {
// console.warn("response body is not HTML, skip injecting. Set jQuery to false to suppress this message");
// }
if (options.jQuery === true) {
if (response.body === "" || !this._checkHtml(response.headers)) {
console.warn("response body is not HTML, skip injecting. Set jQuery to false to suppress this message");
} else {
try {
response.$ = load(response.body);
} catch (err) {
console.error(err);
}
}
}

if (options.callback && typeof options.callback === "function") {
return options.callback(resError, response, options.release);
Expand Down
2 changes: 0 additions & 2 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,5 @@ export const alignOptions = (options: any): any => {
}
});
cleanObject(gotOptions);
// console.log(gotOptions);
// process.exit(0);
return gotOptions;
};
50 changes: 14 additions & 36 deletions test.js
Original file line number Diff line number Diff line change
@@ -1,38 +1,16 @@
// const crawler = require('./dist/index.js');
// const result = crawler.add({
// url: "http://www.google.com",
// method: "GET",
// incomingEncoding: "utf8",
// callback: (err, res, done) => {
// console.log(response.body)
// }
// });
// console.log(result)
// import got from "got";
// import fs from "fs";
// const result = await got({
// url: "http://www.google.com"
// });
// fs.writeFileSync("result", result.body);
// let a = 5
// console.log(a++ % 6)
const a = {
a: {
q1: 1,
q2: 2,
q3: 3
import { crawler } from './dist/index.js';
crawler.add({
url: 'https://www.google.com',
method: 'GET',
headers: {
'Content-Type': 'application/json'
},
b: {
q1: 1,
q2: 2,
q3: 3
callback: (err, res, done) => {
if (err) {
console.log(err);
} else {
console.log(res.body);
}
done();
}
}
// for(const value of Object.values(a)){
// value.q1++;
// value.q2 = 0;
// delete value.q3;
// }
// console.log(a)
const { q1, q4 } = a.a;
console.log(q1, q4)
});
Loading

0 comments on commit 8276305

Please sign in to comment.