-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Content-Disposition filename parser (#26)
Based on several RFCs (referenced in comments) and reading Firefox's source code. Firefox is the primary target but I also checked the algorithm against Chrome 62. In Firefox, I checked the expected behavior by running the next snippet from the console of an about:-page (or the global JS console): ```javascript (s=>Components.classes["@mozilla.org/network/mime-hdrparam;1"] .getService(Components.interfaces.nsIMIMEHeaderParam) .getParameterHTTP(s, 'filename', '', true, {})) ("a;filename=filename.txt") // Outputs filename.txt. Change the last line to test. ``` Tested by opening test/test-content-disposition.html in the browser and confirming that the test outputs look reasonable. The following test expectations fail. This is acceptable since the input values are erroneous anyway and the exact way of error recovery is not very important (servers should not send such obviously invalid values). ``` Assertion failed: Input: attachment; filename*=UTF-8''A%e4B Expected: "" Actual : "AäB" Assertion failed: Input: attachment; filename*=UTF-8''A%e4B; filename=fallback Expected: "fallback" Actual : "AäB" Assertion failed: Input: attachment; filename*0*=UTF-8''A%e4B; filename=fallback Expected: "fallback" Actual : "AäB" Assertion failed: Input: attachment; filename*=UTF-8''f%oo; filename=bar Expected: "bar" Actual : "f%oo" Assertion failed: Input: attachment; filename*=UTF-8''foo%; filename=bar Expected: "bar" Actual : "foo%" ``` Fixes #26
- Loading branch information
Showing
5 changed files
with
446 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,10 @@ | |
* (c) 2013 Rob Wu <[email protected]> (https://robwu.nl) | ||
*/ | ||
/* globals Prefs, MimeActions, mime_fromFilename, ModalDialog, ContentHandlers */ | ||
/* globals getFilenameFromContentDispositionHeader */ | ||
'use strict'; | ||
|
||
var dialogURL = chrome.extension.getURL('dialog.html'); | ||
var r_contentDispositionFilename = /[; ]filename(\*?)=(["']?)(.+)\2/; | ||
|
||
var gForceDialog = 0; | ||
var gForceDialogAllFrames = false; | ||
|
@@ -269,26 +269,6 @@ function setHeader(headers, headerName, headerValue) { | |
}); | ||
} | ||
|
||
/** | ||
* Extract file name from Content-Disposition header | ||
* | ||
* @param {string} contentDisposition | ||
* @return {string} Filename, if found in the Content-Disposition header. | ||
*/ | ||
function getFilenameFromContentDispositionHeader(contentDisposition) { | ||
contentDisposition = r_contentDispositionFilename.exec(contentDisposition); | ||
if (contentDisposition) { | ||
var filename = contentDisposition[3]; | ||
if (contentDisposition[1]) { // "*" in "filename*=" (RFC 5987) | ||
filename = filename.replace(/^[^']+'[^']*'/, ''); | ||
} | ||
try { | ||
filename = decodeURIComponent(filename); | ||
} catch (e) {/* URIError */} | ||
return filename; | ||
} | ||
} | ||
|
||
/** | ||
* Derive file name from URL | ||
* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
'use strict'; | ||
/* exported getFilenameFromContentDispositionHeader */ | ||
|
||
/** | ||
* Extract file name from the Content-Disposition HTTP response header. | ||
* | ||
* @param {string} contentDisposition | ||
* @return {string} Filename, if found in the Content-Disposition header. | ||
*/ | ||
function getFilenameFromContentDispositionHeader(contentDisposition) { | ||
// This parser is designed to be tolerant and accepting of headers that do | ||
// not comply with the standard, but accepted by Firefox. | ||
|
||
let needsEncodingFixup = true; | ||
|
||
// filename*=ext-value ("ext-value" from RFC 5987, referenced by RFC 6266). | ||
let tmp = /(?:^|;)\s*filename\*\s*=\s*([^;\s]+)/i.exec(contentDisposition); | ||
if (tmp) { | ||
tmp = tmp[1]; | ||
let filename = rfc2616unquote(tmp); | ||
filename = unescape(filename); | ||
filename = rfc5987decode(filename); | ||
filename = rfc2047decode(filename); | ||
return fixupEncoding(filename); | ||
} | ||
|
||
// Continuations (RFC 2231 section 3, referenced by RFC 5987 section 3.1). | ||
// filename*n*=part | ||
// filename*n=part | ||
tmp = rfc2231getparam(contentDisposition); | ||
if (tmp) { | ||
// RFC 2047, section | ||
let filename = rfc2047decode(tmp); | ||
return fixupEncoding(filename); | ||
} | ||
|
||
// filename=value (RFC 5987, section 4.1). | ||
tmp = /(?:^|;)\s*filename\s*=\s*([^;\s]+)/.exec(contentDisposition); | ||
if (tmp) { | ||
tmp = tmp[1]; | ||
let filename = rfc2616unquote(tmp); | ||
filename = rfc2047decode(filename); | ||
return fixupEncoding(filename); | ||
} | ||
return ''; | ||
|
||
function textdecode(encoding, value) { | ||
if (encoding) { | ||
try { | ||
let decoder = new TextDecoder(encoding, {fatal: true}); | ||
let bytes = Array.from(value, c => c.charCodeAt(0)); | ||
if (bytes.every(code => code <= 0xFF)) { | ||
value = decoder.decode(new Uint8Array(bytes)); | ||
needsEncodingFixup = false; | ||
} | ||
} catch (e) { | ||
// TextDecoder constructor threw - unrecognized encoding. | ||
} | ||
} | ||
return value; | ||
} | ||
function fixupEncoding(value) { | ||
if (needsEncodingFixup && /[\x80-\xff]/.test(value)) { | ||
// Maybe multi-byte UTF-8. | ||
return textdecode('utf-8', value); | ||
} | ||
return value; | ||
} | ||
function rfc2231getparam(contentDisposition) { | ||
let matches = [], match; | ||
// Iterate over all filename*n= and filename*n*= with n being an integer | ||
// of at least zero. Any non-zero number must not start with '0'. | ||
let iter = /(?:^|;)\s*filename\*((?!0\d)\d+)(\*?)\s*=\s*([^;\s]+)/ig; | ||
while ((match = iter.exec(contentDisposition)) !== null) { | ||
let [, n, quot, part] = match; | ||
n = parseInt(n, 10); | ||
if (n in matches) { | ||
// Ignore anything after the invalid second filename*0. | ||
if (n === 0) break; | ||
continue; | ||
} | ||
matches[n] = [quot, part]; | ||
} | ||
let parts = []; | ||
for (let n = 0; n < matches.length; ++n) { | ||
if (!(n in matches)) { | ||
// Numbers must be consecutive. Truncate when there is a hole. | ||
break; | ||
} | ||
let [quot, part] = matches[n]; | ||
part = rfc2616unquote(part); | ||
if (quot) { | ||
part = unescape(part); | ||
if (n === 0) { | ||
part = rfc5987decode(part); | ||
} | ||
} | ||
parts.push(part); | ||
} | ||
return parts.join(''); | ||
} | ||
function rfc2616unquote(value) { | ||
if (value.startsWith('"')) { | ||
let parts = value.slice(1).split('\\"'); | ||
// Find the first unescaped " and terminate there. | ||
for (let i = 0; i < parts.length; ++i) { | ||
let quotindex = parts[i].indexOf('"'); | ||
if (quotindex !== -1) { | ||
parts[i] = parts[i].slice(0, quotindex); | ||
parts.length = i + 1; // Truncates and stop the iteration. | ||
} | ||
parts[i] = parts[i].replace(/\\(.)/g, '$1'); | ||
} | ||
value = parts.join('"'); | ||
} | ||
return value; | ||
} | ||
function rfc5987decode(extvalue) { | ||
// Decodes "ext-value" from RFC 5987. | ||
let encodingend = extvalue.indexOf('\''); | ||
if (encodingend === -1) { | ||
// Some servers send "filename*=" without encoding'language' prefix, | ||
// e.g. in https://github.com/Rob--W/open-in-browser/issues/26 | ||
// Let's accept the value like Firefox (57) (Chrome 62 rejects it). | ||
return extvalue; | ||
} | ||
let encoding = extvalue.slice(0, encodingend); | ||
let langvalue = extvalue.slice(encodingend + 1); | ||
// Ignore language (RFC 5987 section 3.2.1, and RFC 6266 section 4.1 ). | ||
let value = langvalue.replace(/^[^']*'/, ''); | ||
return textdecode(encoding, value); | ||
} | ||
function rfc2047decode(value) { | ||
// RFC 2047-decode the result. Firefox tried to drop support for it, but | ||
// backed out because some servers use it - https://bugzil.la/875615 | ||
// Firefox's condition for decoding is here: https://searchfox.org/mozilla-central/rev/4a590a5a15e35d88a3b23dd6ac3c471cf85b04a8/netwerk/mime/nsMIMEHeaderParamImpl.cpp#742-748 | ||
|
||
// We are more strict and only recognize RFC 2047-encoding if the value | ||
// starts with "=?", since then it is likely that the full value is | ||
// RFC 2047-encoded. | ||
|
||
// Firefox also decodes words even where RFC 2047 section 5 states: | ||
// "An 'encoded-word' MUST NOT appear within a 'quoted-string'." | ||
|
||
// eslint-disable-next-line no-control-regex | ||
if (!value.startsWith('=?') || /[\x00-\x19\x80-\xff]/.test(value)) { | ||
return value; | ||
} | ||
// RFC 2047, section 2.4 | ||
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" | ||
// charset = token (but let's restrict to characters that denote a | ||
// possibly valid encoding). | ||
// encoding = q or b | ||
// encoded-text = any printable ASCII character other than ? or space. | ||
// ... but Firefox permits ? and space. | ||
return value.replace(/=\?([\w\-]*)\?([QqBb])\?((?:[^?]|\?(?!=))*)\?=/g, | ||
function(_, charset, encoding, text) { | ||
if (encoding === 'q' || encoding === 'Q') { | ||
// RFC 2047 section 4.2. | ||
text = text.replace(/_/g, ' '); | ||
text = text.replace(/=([0-9a-fA-F]{2})/g, | ||
(_, hex) => String.fromCharCode(parseInt(hex, 16))); | ||
return textdecode(charset, text); | ||
} // else encoding is b or B - base64 (RFC 2047 section 4.1) | ||
try { | ||
return atob(text); | ||
} catch (e) { | ||
return text; | ||
} | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
<!DOCTYPE html> | ||
<script src="../extension/content-disposition.js"></script> | ||
<script src="test-content-disposition.js"></script> | ||
See the console. |
Oops, something went wrong.