Skip to content

Commit

Permalink
Add Content-Disposition filename parser (#26)
Browse files Browse the repository at this point in the history
Based on several RFCs (referenced in comments) and reading Firefox's
source code. Firefox is the primary target but I also checked the
algorithm against Chrome 62.

In Firefox, I checked the expected behavior by running the next snippet
from the console of an about:-page (or the global JS console):

```javascript

(s=>Components.classes["@mozilla.org/network/mime-hdrparam;1"]
.getService(Components.interfaces.nsIMIMEHeaderParam)
.getParameterHTTP(s, 'filename', '', true, {}))
("a;filename=filename.txt")

// Outputs filename.txt. Change the last line to test.

```

Tested by opening test/test-content-disposition.html in the browser and
confirming that the test outputs look reasonable.

The following test expectations fail. This is acceptable since the input
values are erroneous anyway and the exact way of error recovery is not
very important (servers should not send such obviously invalid values).

```
Assertion failed: Input: attachment; filename*=UTF-8''A%e4B
Expected: ""
Actual  : "AäB"
Assertion failed: Input: attachment; filename*=UTF-8''A%e4B; filename=fallback
Expected: "fallback"
Actual  : "AäB"
Assertion failed: Input: attachment; filename*0*=UTF-8''A%e4B; filename=fallback
Expected: "fallback"
Actual  : "AäB"
Assertion failed: Input: attachment; filename*=UTF-8''f%oo; filename=bar
Expected: "bar"
Actual  : "f%oo"
Assertion failed: Input: attachment; filename*=UTF-8''foo%; filename=bar
Expected: "bar"
Actual  : "foo%"
```

Fixes #26
  • Loading branch information
Rob--W committed Nov 27, 2017
1 parent 43e43a3 commit 6f3bbb8
Show file tree
Hide file tree
Showing 5 changed files with 446 additions and 21 deletions.
22 changes: 1 addition & 21 deletions extension/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
* (c) 2013 Rob Wu <[email protected]> (https://robwu.nl)
*/
/* globals Prefs, MimeActions, mime_fromFilename, ModalDialog, ContentHandlers */
/* globals getFilenameFromContentDispositionHeader */
'use strict';

var dialogURL = chrome.extension.getURL('dialog.html');
var r_contentDispositionFilename = /[; ]filename(\*?)=(["']?)(.+)\2/;

var gForceDialog = 0;
var gForceDialogAllFrames = false;
Expand Down Expand Up @@ -269,26 +269,6 @@ function setHeader(headers, headerName, headerValue) {
});
}

/**
* Extract file name from Content-Disposition header
*
* @param {string} contentDisposition
* @return {string} Filename, if found in the Content-Disposition header.
*/
function getFilenameFromContentDispositionHeader(contentDisposition) {
contentDisposition = r_contentDispositionFilename.exec(contentDisposition);
if (contentDisposition) {
var filename = contentDisposition[3];
if (contentDisposition[1]) { // "*" in "filename*=" (RFC 5987)
filename = filename.replace(/^[^']+'[^']*'/, '');
}
try {
filename = decodeURIComponent(filename);
} catch (e) {/* URIError */}
return filename;
}
}

/**
* Derive file name from URL
*
Expand Down
172 changes: 172 additions & 0 deletions extension/content-disposition.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
'use strict';
/* exported getFilenameFromContentDispositionHeader */

/**
* Extract file name from the Content-Disposition HTTP response header.
*
* @param {string} contentDisposition
* @return {string} Filename, if found in the Content-Disposition header.
*/
function getFilenameFromContentDispositionHeader(contentDisposition) {
// This parser is designed to be tolerant and accepting of headers that do
// not comply with the standard, but accepted by Firefox.

let needsEncodingFixup = true;

// filename*=ext-value ("ext-value" from RFC 5987, referenced by RFC 6266).
let tmp = /(?:^|;)\s*filename\*\s*=\s*([^;\s]+)/i.exec(contentDisposition);
if (tmp) {
tmp = tmp[1];
let filename = rfc2616unquote(tmp);
filename = unescape(filename);
filename = rfc5987decode(filename);
filename = rfc2047decode(filename);
return fixupEncoding(filename);
}

// Continuations (RFC 2231 section 3, referenced by RFC 5987 section 3.1).
// filename*n*=part
// filename*n=part
tmp = rfc2231getparam(contentDisposition);
if (tmp) {
// RFC 2047, section
let filename = rfc2047decode(tmp);
return fixupEncoding(filename);
}

// filename=value (RFC 5987, section 4.1).
tmp = /(?:^|;)\s*filename\s*=\s*([^;\s]+)/.exec(contentDisposition);
if (tmp) {
tmp = tmp[1];
let filename = rfc2616unquote(tmp);
filename = rfc2047decode(filename);
return fixupEncoding(filename);
}
return '';

function textdecode(encoding, value) {
if (encoding) {
try {
let decoder = new TextDecoder(encoding, {fatal: true});
let bytes = Array.from(value, c => c.charCodeAt(0));
if (bytes.every(code => code <= 0xFF)) {
value = decoder.decode(new Uint8Array(bytes));
needsEncodingFixup = false;
}
} catch (e) {
// TextDecoder constructor threw - unrecognized encoding.
}
}
return value;
}
function fixupEncoding(value) {
if (needsEncodingFixup && /[\x80-\xff]/.test(value)) {
// Maybe multi-byte UTF-8.
return textdecode('utf-8', value);
}
return value;
}
function rfc2231getparam(contentDisposition) {
let matches = [], match;
// Iterate over all filename*n= and filename*n*= with n being an integer
// of at least zero. Any non-zero number must not start with '0'.
let iter = /(?:^|;)\s*filename\*((?!0\d)\d+)(\*?)\s*=\s*([^;\s]+)/ig;
while ((match = iter.exec(contentDisposition)) !== null) {
let [, n, quot, part] = match;
n = parseInt(n, 10);
if (n in matches) {
// Ignore anything after the invalid second filename*0.
if (n === 0) break;
continue;
}
matches[n] = [quot, part];
}
let parts = [];
for (let n = 0; n < matches.length; ++n) {
if (!(n in matches)) {
// Numbers must be consecutive. Truncate when there is a hole.
break;
}
let [quot, part] = matches[n];
part = rfc2616unquote(part);
if (quot) {
part = unescape(part);
if (n === 0) {
part = rfc5987decode(part);
}
}
parts.push(part);
}
return parts.join('');
}
function rfc2616unquote(value) {
if (value.startsWith('"')) {
let parts = value.slice(1).split('\\"');
// Find the first unescaped " and terminate there.
for (let i = 0; i < parts.length; ++i) {
let quotindex = parts[i].indexOf('"');
if (quotindex !== -1) {
parts[i] = parts[i].slice(0, quotindex);
parts.length = i + 1; // Truncates and stop the iteration.
}
parts[i] = parts[i].replace(/\\(.)/g, '$1');
}
value = parts.join('"');
}
return value;
}
function rfc5987decode(extvalue) {
// Decodes "ext-value" from RFC 5987.
let encodingend = extvalue.indexOf('\'');
if (encodingend === -1) {
// Some servers send "filename*=" without encoding'language' prefix,
// e.g. in https://github.com/Rob--W/open-in-browser/issues/26
// Let's accept the value like Firefox (57) (Chrome 62 rejects it).
return extvalue;
}
let encoding = extvalue.slice(0, encodingend);
let langvalue = extvalue.slice(encodingend + 1);
// Ignore language (RFC 5987 section 3.2.1, and RFC 6266 section 4.1 ).
let value = langvalue.replace(/^[^']*'/, '');
return textdecode(encoding, value);
}
function rfc2047decode(value) {
// RFC 2047-decode the result. Firefox tried to drop support for it, but
// backed out because some servers use it - https://bugzil.la/875615
// Firefox's condition for decoding is here: https://searchfox.org/mozilla-central/rev/4a590a5a15e35d88a3b23dd6ac3c471cf85b04a8/netwerk/mime/nsMIMEHeaderParamImpl.cpp#742-748

// We are more strict and only recognize RFC 2047-encoding if the value
// starts with "=?", since then it is likely that the full value is
// RFC 2047-encoded.

// Firefox also decodes words even where RFC 2047 section 5 states:
// "An 'encoded-word' MUST NOT appear within a 'quoted-string'."

// eslint-disable-next-line no-control-regex
if (!value.startsWith('=?') || /[\x00-\x19\x80-\xff]/.test(value)) {
return value;
}
// RFC 2047, section 2.4
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
// charset = token (but let's restrict to characters that denote a
// possibly valid encoding).
// encoding = q or b
// encoded-text = any printable ASCII character other than ? or space.
// ... but Firefox permits ? and space.
return value.replace(/=\?([\w\-]*)\?([QqBb])\?((?:[^?]|\?(?!=))*)\?=/g,
function(_, charset, encoding, text) {
if (encoding === 'q' || encoding === 'Q') {
// RFC 2047 section 4.2.
text = text.replace(/_/g, ' ');
text = text.replace(/=([0-9a-fA-F]{2})/g,
(_, hex) => String.fromCharCode(parseInt(hex, 16)));
return textdecode(charset, text);
} // else encoding is b or B - base64 (RFC 2047 section 4.1)
try {
return atob(text);
} catch (e) {
return text;
}
});
}
}
1 change: 1 addition & 0 deletions extension/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"mime-metadata.js",
"prefs.js",
"dialogmanager.js",
"content-disposition.js",
"content-handlers-firefox.js",
"background.js"
]
Expand Down
4 changes: 4 additions & 0 deletions test/test-content-disposition.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!DOCTYPE html>
<script src="../extension/content-disposition.js"></script>
<script src="test-content-disposition.js"></script>
See the console.
Loading

0 comments on commit 6f3bbb8

Please sign in to comment.