Skip to content

Commit

Permalink
Merge pull request #16376 from uberbrady/improve_safety_csv_charset_d…
Browse files Browse the repository at this point in the history
…etection

Add some safeties around the charset-detection and transliteration
  • Loading branch information
snipe authored Feb 27, 2025
2 parents 23ce54e + 646e3e8 commit 3928c8a
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 13 deletions.
42 changes: 29 additions & 13 deletions app/Http/Controllers/Api/ImportController.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,41 @@ public function store() : JsonResponse
if (! ini_get('auto_detect_line_endings')) {
ini_set('auto_detect_line_endings', '1');
}
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
if ($tmpresults !== false) {
if (function_exists('iconv')) {
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
$encoding = $detector->getEncoding($file_contents);
\Log::warning("Discovered encoding: $encoding in uploaded CSV");
$reader = null;
if (strcasecmp($encoding, 'UTF-8') != 0) {
$transliterated = false;
try {
$transliterated = iconv(strtoupper($encoding), 'UTF-8', $file_contents);
} catch (\Exception $e) {
$transliterated = false; //blank out the partially-decoded string
return response()->json(
Helper::formatStandardApiResponse(
'error',
null,
trans('admin/hardware/message.import.transliterate_failure', ["encoding" => $encoding])
),
422
);
}
if ($transliterated !== false) {
$tmpname = tempnam(sys_get_temp_dir(), '');
$tmpresults = file_put_contents($tmpname, $transliterated);
$transliterated = null; //save on memory?
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
if ($tmpresults !== false) {
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
if ($newfile->isValid()) {
$file = $newfile;
}
}
}
}
$file_contents = null; //try to save on memory, I guess?
}
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
$file_contents = null; //try to save on memory, I guess?
try {
$import->header_row = $reader->fetchOne(0);
Expand Down
1 change: 1 addition & 0 deletions resources/lang/en-US/admin/hardware/message.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
'file_already_deleted' => 'The file selected was already deleted',
'header_row_has_malformed_characters' => 'One or more attributes in the header row contain malformed UTF-8 characters',
'content_row_has_malformed_characters' => 'One or more attributes in the first row of content contain malformed UTF-8 characters',
'transliterate_failure' => 'Transliteration from :encoding to UTF-8 failed due to invalid characters in input'
],


Expand Down
20 changes: 20 additions & 0 deletions tests/Feature/Importing/Ui/ImportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,24 @@ public function testStoreInternationalAsset(): void
]);
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
}

public function testStoreInternationalAssetMisparse(): void
{
$evil_maker = function ($arr) {
$results = '';
foreach ($arr as $thing) {
$results .= chr($thing);
}
return $results;
};

// 0xC0 makes it 'not unicode', and 0xFF makes it 'likely WINDOWS-1251', and 0x98 at the end makes it 'not-valid-Windows-1251'
$evil_content = $evil_maker([0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x98]);

$this->actingAsForApi(User::factory()->superuser()->create());
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $evil_content)]])
->assertStatus(422)
->assertStatusMessageIs('error')
->assertMessagesAre(trans('admin/hardware/message.import.transliterate_failure', ["encoding" => "windows-1251"]));
}
}

0 comments on commit 3928c8a

Please sign in to comment.