Extract RFC2231 attachment name from message headers (#6729) (#6783)

* Extract RFC2231 attachment name from message headers (#6729)
* Workaround for attachments with invalid content type (e.g. PDF) (#6816)
pull/7195/head
Aleksander Machniak 5 years ago committed by GitHub
parent 1613f3ab4c
commit 97e6065897
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2054,18 +2054,23 @@ class rcube_imap extends rcube_storage
// build parts list for headers pre-fetching
for ($i=0; $i<count($part); $i++) {
if (!is_array($part[$i])) {
break;
}
// fetch message headers if message/rfc822
// or named part (could contain Content-Location header)
if (!is_array($part[$i][0])) {
// fetch message headers if message/rfc822 or named part
if (is_array($part[$i]) && !is_array($part[$i][0])) {
$tmp_part_id = $struct->mime_id ? $struct->mime_id.'.'.($i+1) : $i+1;
if (strtolower($part[$i][0]) == 'message' && strtolower($part[$i][1]) == 'rfc822') {
$mime_part_headers[] = $tmp_part_id;
}
else if (in_array('name', (array)$part[$i][2]) && empty($part[$i][3])) {
$mime_part_headers[] = $tmp_part_id;
else if (!empty($part[$i][2]) && empty($part[$i][3])) {
$params = array_map('strtolower', (array) $part[$i][2]);
$find = array('name', 'filename', 'name*', 'filename*', 'name*0', 'filename*0', 'name*0*', 'filename*0*');
// In case of malformed header check disposition. E.g. some servers for
// "Content-Type: PDF; name=test.pdf" may return text/plain and ignore name argument
if (count(array_intersect($params, $find)) > 0
|| (is_array($part[$i][9]) && stripos($part[$i][9][0], 'attachment') === 0)
) {
$mime_part_headers[] = $tmp_part_id;
}
}
}
}
@ -2113,9 +2118,9 @@ class rcube_imap extends rcube_storage
*/
// regular part
$struct->ctype_primary = strtolower($part[0]);
$struct->ctype_primary = strtolower($part[0]);
$struct->ctype_secondary = strtolower($part[1]);
$struct->mimetype = $struct->ctype_primary.'/'.$struct->ctype_secondary;
$struct->mimetype = $struct->ctype_primary.'/'.$struct->ctype_secondary;
// read content type parameters
if (is_array($part[2])) {
@ -2239,96 +2244,37 @@ class rcube_imap extends rcube_storage
*/
protected function set_part_filename(&$part, $headers = null)
{
if (!empty($part->d_parameters['filename'])) {
$filename_mime = $part->d_parameters['filename'];
}
else if (!empty($part->d_parameters['filename*'])) {
$filename_encoded = $part->d_parameters['filename*'];
}
else if (!empty($part->ctype_parameters['name*'])) {
$filename_encoded = $part->ctype_parameters['name*'];
}
// RFC2231 value continuations
// TODO: this should be rewrited to support RFC2231 4.1 combinations
else if (!empty($part->d_parameters['filename*0'])) {
$i = 0;
while (isset($part->d_parameters['filename*'.$i])) {
$filename_mime .= $part->d_parameters['filename*'.$i];
$i++;
}
// some servers (eg. dovecot-1.x) have no support for parameter value continuations
// we must fetch and parse headers "manually"
if ($i<2) {
if (!$headers) {
$headers = $this->conn->fetchPartHeader(
$this->folder, $this->msg_uid, true, $part->mime_id);
}
$filename_mime = '';
$i = 0;
while (preg_match('/filename\*'.$i.'\s*=\s*"*([^"\n;]+)[";]*/', $headers, $matches)) {
$filename_mime .= $matches[1];
$i++;
}
// Some IMAP servers do not support RFC2231, if we have
// part headers we'll get attachment name from them, not the BODYSTRUCTURE
$rfc2231_params = array();
if (!empty($headers) || !empty($part->headers)) {
if (is_object($headers)) {
$headers = get_object_vars($headers);
}
}
else if (!empty($part->d_parameters['filename*0*'])) {
$i = 0;
while (isset($part->d_parameters['filename*'.$i.'*'])) {
$filename_encoded .= $part->d_parameters['filename*'.$i.'*'];
$i++;
else {
$headers = !empty($headers) ? rcube_mime::parse_headers($headers) : $part->headers;
}
if ($i<2) {
if (!$headers) {
$headers = $this->conn->fetchPartHeader(
$this->folder, $this->msg_uid, true, $part->mime_id);
}
$filename_encoded = '';
$i = 0; $matches = array();
while (preg_match('/filename\*'.$i.'\*\s*=\s*"*([^"\n;]+)[";]*/', $headers, $matches)) {
$filename_encoded .= $matches[1];
$i++;
$tokens = preg_split('/;[\s\r\n\t]*/', $headers['content-type'] . ';' . $headers['content-disposition']);
foreach ($tokens as $token) {
// TODO: Use order defined by the parameter name not order of occurrence in the header
if (preg_match('/^(name|filename)\*([0-9]*)\*?="*([^"]+)"*/i', $token, $matches)) {
$rfc2231_params[strtolower($matches[1])] .= $matches[3];
}
}
}
else if (!empty($part->ctype_parameters['name*0'])) {
$i = 0;
while (isset($part->ctype_parameters['name*'.$i])) {
$filename_mime .= $part->ctype_parameters['name*'.$i];
$i++;
}
if ($i<2) {
if (!$headers) {
$headers = $this->conn->fetchPartHeader(
$this->folder, $this->msg_uid, true, $part->mime_id);
}
$filename_mime = '';
$i = 0; $matches = array();
while (preg_match('/\s+name\*'.$i.'\s*=\s*"*([^"\n;]+)[";]*/', $headers, $matches)) {
$filename_mime .= $matches[1];
$i++;
}
}
if (isset($rfc2231_params['name'])) {
$filename_encoded = $rfc2231_params['name'];
}
else if (!empty($part->ctype_parameters['name*0*'])) {
$i = 0;
while (isset($part->ctype_parameters['name*'.$i.'*'])) {
$filename_encoded .= $part->ctype_parameters['name*'.$i.'*'];
$i++;
}
if ($i<2) {
if (!$headers) {
$headers = $this->conn->fetchPartHeader(
$this->folder, $this->msg_uid, true, $part->mime_id);
}
$filename_encoded = '';
$i = 0; $matches = array();
while (preg_match('/\s+name\*'.$i.'\*\s*=\s*"*([^"\n;]+)[";]*/', $headers, $matches)) {
$filename_encoded .= $matches[1];
$i++;
}
}
else if (isset($rfc2231_params['filename'])) {
$filename_encoded = $rfc2231_params['filename'];
}
else if (!empty($part->d_parameters['filename'])) {
$filename_mime = $part->d_parameters['filename'];
}
// read 'name' after rfc2231 parameters as it may contains truncated filename (from Thunderbird)
// read 'name' after rfc2231 parameters as it may contain truncated filename (from Thunderbird)
else if (!empty($part->ctype_parameters['name'])) {
$filename_mime = $part->ctype_parameters['name'];
}
@ -2341,7 +2287,7 @@ class rcube_imap extends rcube_storage
}
// decode filename
if (!empty($filename_mime)) {
if (isset($filename_mime)) {
if (!empty($part->charset)) {
$charset = $part->charset;
}
@ -2354,7 +2300,7 @@ class rcube_imap extends rcube_storage
$part->filename = rcube_mime::decode_mime_string($filename_mime, $charset);
}
else if (!empty($filename_encoded)) {
else if (isset($filename_encoded)) {
// decode filename according to RFC 2231, Section 4
if (preg_match("/^([^']*)'[^']*'(.*)$/", $filename_encoded, $fmatches)) {
$filename_charset = $fmatches[1];
@ -2363,6 +2309,18 @@ class rcube_imap extends rcube_storage
$part->filename = rcube_charset::convert(urldecode($filename_encoded), $filename_charset);
}
// Workaround for invalid Content-Type (#6816)
// Some servers for "Content-Type: PDF; name=test.pdf" may return text/plain and ignore name argument
if ($part->mimetype == 'text/plain' && !empty($headers['content-type'])) {
$tokens = preg_split('/;[\s\r\n\t]*/', $headers['content-type']);
$type = rcube_mime::fix_mimetype($tokens[0]);
if ($type != $part->mimetype) {
$part->mimetype = $type;
list($part->ctype_primary, $part->ctype_secondary) = explode('/', $part->mimetype);
}
}
}
/**

@ -893,4 +893,37 @@ class rcube_mime
return implode('@', $parts);
}
/**
* Fix mimetype name.
*
* @param string $type Mimetype
*
* @return string Mimetype
*/
public static function fix_mimetype($type)
{
$type = strtolower(trim($type));
$aliases = array(
'image/x-ms-bmp' => 'image/bmp', // #4771
'pdf' => 'application/pdf', // #6816
);
if ($alias = $aliases[$type]) {
return $alias;
}
// Some versions of Outlook create garbage Content-Type:
// application/pdf.A520491B_3BF7_494D_8855_7FAC2C6C0608
if (preg_match('/^application\/pdf.+/', $type)) {
return 'application/pdf';
}
// treat image/pjpeg (image/pjpg, image/jpg) as image/jpeg (#4196)
if (preg_match('/^image\/p?jpe?g$/', $type)) {
return 'image/jpeg';
}
return $type;
}
}

@ -1046,7 +1046,7 @@ function rcmail_part_image_type($part)
// Content-Type: image/*...
if (strpos($mimetype, 'image/') === 0) {
return rcmail_fix_mimetype($mimetype);
return $mimetype;
}
// Many clients use application/octet-stream, we'll detect mimetype
@ -1552,31 +1552,6 @@ function rcmail_identity_select($MESSAGE, $identities = null, $compose_mode = 'r
return rcmail_sendmail::identity_select($MESSAGE, $identities, $compose_mode);
}
// Fixes some content-type names
function rcmail_fix_mimetype($name)
{
$map = array(
'image/x-ms-bmp' => 'image/bmp', // #1490282
);
$name = strtolower($name);
if ($alias = $map[$name]) {
$name = $alias;
}
// Some versions of Outlook create garbage Content-Type:
// application/pdf.A520491B_3BF7_494D_8855_7FAC2C6C0608
else if (preg_match('/^application\/pdf.+/', $name)) {
$name = 'application/pdf';
}
// treat image/pjpeg (image/pjpg, image/jpg) as image/jpeg (#1489097)
else if (preg_match('/^image\/p?jpe?g$/', $name)) {
$name = 'image/jpeg';
}
return $name;
}
// return attachment filename, handle empty filename case
function rcmail_attachment_name($attachment, $display = false)
{

@ -160,7 +160,7 @@ if (empty($_GET['_thumb']) && $attachment->is_valid()) {
}
// "fix" real mimetype the same way the original is before comparison
$real_mimetype = rcmail_fix_mimetype($real_mimetype);
$real_mimetype = rcube_mime::fix_mimetype($real_mimetype);
$valid = $valid_extension && rcmail_mimetype_compare($real_mimetype, $mimetype);
}
@ -446,7 +446,7 @@ class rcmail_attachment_handler
// check connection status
self::check_storage_status();
$this->mimetype = rcmail_fix_mimetype($this->mimetype);
$this->mimetype = rcube_mime::fix_mimetype($this->mimetype);
}
/**

@ -194,7 +194,7 @@ function rcmail_message_attachments($attrib)
foreach ($MESSAGE->attachments as $attach_prop) {
$filename = rcmail_attachment_name($attach_prop, true);
$filesize = $RCMAIL->message_part_size($attach_prop);
$mimetype = rcmail_fix_mimetype($attach_prop->mimetype);
$mimetype = $attach_prop->mimetype;
$class = rcube_utils::file2class($mimetype, $filename);
$id = 'attach' . $attach_prop->mime_id;

Loading…
Cancel
Save