Skip to content

Commit

Permalink
feat: add in charset, fetch, validate
Browse files Browse the repository at this point in the history
  • Loading branch information
willfarrell committed Aug 17, 2022
1 parent 70834bd commit 5c4add7
Show file tree
Hide file tree
Showing 33 changed files with 1,770 additions and 168 deletions.
2 changes: 1 addition & 1 deletion lerna.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"packages": ["packages/*"],
"useNx": false,
"version": "0.0.2"
"version": "0.0.3"
}
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@datastream/monorepo",
"version": "0.0.2",
"version": "0.0.3",
"description": "Streams made easy.",
"type": "module",
"engines": {
Expand All @@ -13,7 +13,7 @@
"pre-commit": "lint-staged",
"install": "lerna bootstrap",
"lint": "lint-staged",
"test": "npm run build && node --test --conditions=node packages && node --test --conditions=webstream packages",
"test": "npm run build && c8 node --test --conditions=node packages && c8 node --test --conditions=webstream packages",
"build": "bin/esbuild",
"release:tag": "git tag $npm_package_version && git push --tags",
"lerna:rm": "npm run lerna:rm:node_modules && npm run lerna:rm:lock",
Expand All @@ -22,7 +22,7 @@
"lerna:update": "lerna exec --bail --concurrency 5 npm update && npm install",
"lerna:outdated": "lerna exec --concurrency 5 npm outdated",
"lerna:audit": "lerna exec --concurrency 2 npm audit fix",
"lerna:sync": "lerna exec --bail --concurrency 2 npm install && lerna publish --exact --yes --skip-npm --skip-git --repo-version $npm_package_version",
"lerna:sync": "lerna publish --exact --yes --skip-npm --skip-git --repo-version $npm_package_version",
"lerna:publish": "lerna publish --exact --yes --skip-git --repo-version $npm_package_version",
"lerna:publish:next": "lerna publish --exact --yes --skip-git --repo-version $npm_package_version --dist-tag next"
},
Expand Down
14 changes: 14 additions & 0 deletions packages/charset/decode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { createTransformStream } from '@datastream/core'
import iconv from 'iconv-lite' // doesn't support esm

export const charsetDecodeStream = (charset) => {
charset = getSupportedEncoding(charset)
if (charset === 'UTF-8') return createTransformStream()
return iconv.decodeStream(charset)
}
const getSupportedEncoding = (charset) => {
if (charset === 'ISO-8859-8-I') charset = 'ISO-8859-8'
if (!iconv.encodingExists(charset)) charset = 'UTF-8'
return charset
}
export default charsetDecodeStream
54 changes: 54 additions & 0 deletions packages/charset/detect.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { createTransformStream } from '@datastream/core'
import detect from 'charset-detector'

const charsets = {
'UTF-8': 0,
'UTF-16BE': 0,
'UTF-16LE': 0,
'UTF-32BE': 0,
'UTF-32LE': 0,
Shift_JIS: 0,
'ISO-2022-JP': 0,
'ISO-2022-CN': 0,
'ISO-2022-KR': 0,
GB18030: 0,
'EUC-JP': 0,
'EUC-KR': 0,
Big5: 0,
'ISO-8859-1': 0,
'ISO-8859-2': 0,
'ISO-8859-5': 0,
'ISO-8859-6': 0,
'ISO-8859-7': 0,
'ISO-8859-8-I': 0,
'ISO-8859-8': 0,
'windows-1251': 0,
'windows-1256': 0,
'windows-1252': 0,
'windows-1254': 0,
'windows-1250': 0,
'KOIR8-R': 0,
'ISO-8859-9': 0
}

export const charsetDetectStream = (result, options = { key: 'charset' }) => {
const { key } = options
const transform = (chunk) => {
const matches = detect(chunk)
if (matches.length) {
for (const match of matches) {
charsets[match.charsetName] += match.confidence
}
}
}
const stream = createTransformStream(transform, options)
stream.result = () => {
const values = Object.entries(charsets)
.map(([charset, confidence]) => ({ charset, confidence }))
.sort((a, b) => b.confidence - a.confidence)
return { key, value: values[0] }
}
return stream
}

export default charsetDetectStream
14 changes: 14 additions & 0 deletions packages/charset/encode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { createTransformStream } from '@datastream/core'
import iconv from 'iconv-lite' // doesn't support esm

export const charsetEncodeStream = (charset) => {
charset = getSupportedEncoding(charset)
if (charset === 'UTF-8') return createTransformStream()
return iconv.encodeStream(charset)
}
const getSupportedEncoding = (charset) => {
if (charset === 'ISO-8859-8-I') charset = 'ISO-8859-8'
if (!iconv.encodingExists(charset)) charset = 'UTF-8'
return charset
}
export default charsetEncodeStream
13 changes: 13 additions & 0 deletions packages/charset/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import detectStream from '@datastream/charset/detect'
import decodeStream from '@datastream/charset/decode'
import encodeStream from '@datastream/charset/encode'

export const charsetDetectStream = detectStream
export const charsetDecodeStream = decodeStream
export const charsetEncodeStream = encodeStream

export default {
detectStream,
decodeStream,
encodeStream
}
102 changes: 102 additions & 0 deletions packages/charset/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

135 changes: 135 additions & 0 deletions packages/charset/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"name": "@datastream/charset",
"version": "0.0.3",
"description": "",
"type": "module",
"engines": {
"node": ">=18"
},
"engineStrict": true,
"publishConfig": {
"access": "public"
},
"main": "./index.web.mjs",
"module": "./index.web.mjs",
"exports": {
".": {
"node": {
"webstream": {
"types": "./index.d.ts",
"default": "./index.web.mjs"
},
"import": {
"types": "./index.d.ts",
"default": "./index.node.mjs"
},
"require": {
"types": "./index.d.ts",
"default": "./index.node.cjs"
}
},
"import": {
"types": "./index.d.ts",
"default": "./index.web.mjs"
}
},
"./detect": {
"node": {
"webstream": {
"types": "./detect.d.ts",
"default": "./detect.web.mjs"
},
"import": {
"types": "./detect.d.ts",
"default": "./detect.node.mjs"
},
"require": {
"types": "./detect.d.ts",
"default": "./detect.node.cjs"
}
},
"import": {
"types": "./detect.d.ts",
"default": "./detect.web.mjs"
}
},
"./decode": {
"node": {
"webstream": {
"types": "./decode.d.ts",
"default": "./decode.web.mjs"
},
"import": {
"types": "./decode.d.ts",
"default": "./decode.node.mjs"
},
"require": {
"types": "./decode.d.ts",
"default": "./decode.node.cjs"
}
},
"import": {
"types": "./decode.d.ts",
"default": "./decode.web.mjs"
}
},
"./encode": {
"node": {
"webstream": {
"types": "./encode.d.ts",
"default": "./encode.web.mjs"
},
"import": {
"types": "./encode.d.ts",
"default": "./encode.node.mjs"
},
"require": {
"types": "./encode.d.ts",
"default": "./encode.node.cjs"
}
},
"import": {
"types": "./encode.d.ts",
"default": "./encode.web.mjs"
}
}
},
"types": "index.d.ts",
"files": [
"*.mjs",
"*.cjs",
"*.map",
"*.d.ts"
],
"scripts": {
"test": "npm run test:unit",
"test:unit": "ava",
"test:benchmark": "node __benchmarks__/index.js"
},
"license": "MIT",
"keywords": [
"Web Stream API",
"Node Stream API"
],
"author": {
"name": "datastream contributors",
"url": "https://github.com/willfarrell/datastream/graphs/contributors"
},
"repository": {
"type": "git",
"url": "github:willfarrell/datastream",
"directory": "packages/file-read"
},
"bugs": {
"url": "https://github.com/willfarrell/datastream/issues"
},
"homepage": "https://datastream.js.org",
"dependencies": {
"@datastream/core": "0.0.3",
"charset-detector": "0.0.2"
},
"devDependencies": {
"@datastream/charset": "0.0.2"
},
"gitHead": "70834bdf6ea9d690ca90a079371b73a0c7ea4a14"
}
Loading

0 comments on commit 5c4add7

Please sign in to comment.