From d45cd4ea62c0aaca2c55e773ab4fdaa9e2ed8961 Mon Sep 17 00:00:00 2001 From: chenc <1458513@qq.com> Date: Fri, 12 May 2023 09:07:47 +0800 Subject: [PATCH] init code --- .idea/.gitignore | 8 + .idea/mime-type-sniffer.iml | 12 + .idea/misc.xml | 6 + .idea/modules.xml | 8 + .idea/php.xml | 20 ++ .idea/vcs.xml | 6 + composer.json | 24 ++ src/MagicNumber.php | 99 +++++++++ src/MimeTypeSniffer.php | 426 ++++++++++++++++++++++++++++++++++++ src/OfficeDocType.php | 18 ++ src/OfficeExtensionType.php | 65 ++++++ 11 files changed, 692 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/mime-type-sniffer.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/php.xml create mode 100644 .idea/vcs.xml create mode 100644 composer.json create mode 100644 src/MagicNumber.php create mode 100644 src/MimeTypeSniffer.php create mode 100644 src/OfficeDocType.php create mode 100644 src/OfficeExtensionType.php diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/mime-type-sniffer.iml b/.idea/mime-type-sniffer.iml new file mode 100644 index 0000000..4980e63 --- /dev/null +++ b/.idea/mime-type-sniffer.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..3ce3588 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..4159e82 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/php.xml b/.idea/php.xml new file mode 100644 index 0000000..6f95ace --- /dev/null +++ b/.idea/php.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..c8397c9 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..388f0d0 --- /dev/null +++ b/composer.json @@ -0,0 +1,24 @@ +{ + "name": "aix/mime-type-sniffer", + "description": "jsaix mime-type-sniffer", + "version": "1.0.0", + "type": "libs", + "license": "JSAIX LICENSE", + "authors": [ + { + "name": "chenc", + "email": "1458513@qq.com" + } + ], + "require": { + "php": ">=7.3.0" + }, + "autoload": { + "classmap": [ + "src/" + ], + "psr-4": { + "MimeTypeSniffer\\": "src/" + } + } +} \ No newline at end of file diff --git a/src/MagicNumber.php b/src/MagicNumber.php new file mode 100644 index 0000000..44f20c0 --- /dev/null +++ b/src/MagicNumber.php @@ -0,0 +1,99 @@ +mimeType = $mimeType; + $this->magic = $magic; + $this->magicLength = strlen($magic); + $this->isString = $isString; + if ($mask && (strlen($mask) !== strlen($magic))) { + throw new \RuntimeException("magic and mask sizes must be equal"); + } + $this->mask = $mask; + } + + /** + * @return string + */ + public function getMimeType() + { + return $this->mimeType; + } + + /** + * @return string + */ + public function getMagic() + { + return $this->magic; + } + + /** + * @return bool + */ + public function isString() + { + return $this->isString; + } + + /** + * @return string + */ + public function getMask() + { + return $this->mask; + } + + /** + * @return int + */ + public function getMagicLength() + { + return $this->magicLength; + } + + +} \ No newline at end of file diff --git a/src/MimeTypeSniffer.php b/src/MimeTypeSniffer.php new file mode 100644 index 0000000..c9702bc --- /dev/null +++ b/src/MimeTypeSniffer.php @@ -0,0 +1,426 @@ +magicNumbers = [ + new MagicNumber("application/pdf", "%PDF-"), + new MagicNumber("application/postscript", "%!PS-Adobe-"), + new MagicNumber("image/gif", "GIF87a"), + new MagicNumber("image/gif", "GIF89a"), + new MagicNumber("image/png", "\x89" . "PNG\x0D\x0A\x1A\x0A"), + new MagicNumber("image/jpeg", "\xFF\xD8\xFF"), + new MagicNumber("image/bmp", "BM"), + // Source: Mozilla + new MagicNumber("text/plain", "#!"), // Script + new MagicNumber("text/plain", "%!"), // Script, similar to PS + new MagicNumber("text/plain", "From"), + new MagicNumber("text/plain", ">From"), + // Chrome specific + new MagicNumber("application/x-gzip", "\x1F\x8B\x08"), + new MagicNumber("audio/x-pn-realaudio", "\x2E\x52\x4D\x46"), + new MagicNumber("video/x-ms-asf", + "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"), + new MagicNumber("image/tiff", "I I"), + new MagicNumber("image/tiff", "II*"), + new MagicNumber("image/tiff", "MM\x00*"), + new MagicNumber("audio/mpeg", "ID3"), + new MagicNumber("image/webp", "RIFF....WEBPVP"), + new MagicNumber("video/webm", "\x1A\x45\xDF\xA3"), + new MagicNumber("application/zip", "PK\x03\x04"), + new MagicNumber("application/x-rar-compressed", "Rar!\x1A\x07\x00"), + new MagicNumber("application/x-msmetafile", "\xD7\xCD\xC6\x9A"), + new MagicNumber("application/octet-stream", "MZ"), + + new MagicNumber("application/x-chrome-extension", "Cr24\x02\x00\x00\x00"), + new MagicNumber("application/x-chrome-extension", "Cr24\x03\x00\x00\x00"), + + // Sniffing for Flash: + // + // new MagicNumber("application/x-shockwave-flash", "CWS"), + // new MagicNumber("application/x-shockwave-flash", "FLV"), + // new MagicNumber("application/x-shockwave-flash", "FWS"), + // + // Including these magic number for Flash is a trade off. + // + // Pros: + // * Flash is an important and popular file format + // + // Cons: + // * These patterns are fairly weak + // * If we mistakenly decide something is Flash, we will execute it + // in the origin of an unsuspecting site. This could be a security + // vulnerability if the site allows users to upload content. + // + // On balance, we do not include these patterns. + ]; + + + $this->officeMagicNumbers = [ + new MagicNumber("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"), + new MagicNumber("OOXML", "PK\x03\x04"), + ]; + + + $this->magicXMLOrHTML = [ + new MagicNumber("application/atom+xml", "magicXMLOrHTML[] = new MagicNumber("text/html", "<" . $htmlTag, true); + } + + + $this->extraMagicNumbers = [ + new MagicNumber("image/x-xbitmap", "#define"), + new MagicNumber("image/x-icon", "\x00\x00\x01\x00"), + new MagicNumber("image/svg+xml", "officeExtensionTypes = [ + new OfficeExtensionType(OfficeDocType::WORD, ".doc"), + new OfficeExtensionType(OfficeDocType::EXCEL, ".xls"), + new OfficeExtensionType(OfficeDocType::POWERPOINT, ".ppt"), + new OfficeExtensionType(OfficeDocType::WORD, ".docx"), + new OfficeExtensionType(OfficeDocType::EXCEL, ".xlsx"), + new OfficeExtensionType(OfficeDocType::POWERPOINT, ".pptx"), + ]; + + } + + /** + * MimeTypeSniffer constructor. + */ + public function __construct() + { + $this->initialize(); + } + + + /** + * @param string $magic + * @param string $content + * @param int $len + * @return bool + */ + private function magicCmp($magic, $content, $len) + { + $cursor = 0; + while ($len) { + $magicChar = ord(substr($magic, $cursor, 1)); + $contentChar = ord(substr($content, $cursor, 1)); + if (($magicChar !== ord(".")) && ($magicChar !== $contentChar)) { + return false; + } + $cursor++; + $len--; + } + return true; + } + + + /** + * @param string $magic + * @param string $content + * @param int $len + * @param string $mask + * @return bool + */ + private function magicMaskCmp($magic, $content, $len, $mask) + { + $cursor = 0; + while ($len) { + $magicChar = ord(substr($magic, $cursor, 1)); + $contentChar = ord(substr($content, $cursor, 1)); + $maskChar = ord(substr($mask, $cursor, 1)); + if (($magicChar !== ord(".")) && ($magicChar !== ($maskChar & $contentChar))) { + return false; + } + $cursor++; + $len--; + } + return true; + } + + /** + * @param string $content + * @param int $size + * @param MagicNumber $magicNumber + * @param string $result + * @return bool + */ + private function matchMagicNumber($content, $size, $magicNumber, &$result) + { + $len = $magicNumber->getMagicLength(); + $match = false; + + if ($magicNumber->isString()) { + $match = strcmp(strtolower(substr($content, 0, $len)), $magicNumber->getMagic()) === 0; + } else { + if ($size > $len) { + if ($magicNumber->getMask()) { + $match = $this->magicMaskCmp($magicNumber->getMagic(), $content, $len, $magicNumber->getMask()); + } else { + $match = $this->magicCmp($magicNumber->getMagic(), $content, $len); + } + } + } + + if ($match) { + $result = $magicNumber->getMimeType(); + return true; + } + + return false; + } + + /** + * @param string $content + * @param int $size + * @param MagicNumber[] $magicNumbers + * @param string $result + * @return bool + */ + private function checkForMagicNumbers($content, $size, $magicNumbers, &$result) + { + foreach ($magicNumbers as $magicNumber) { + if ($this->matchMagicNumber($content, $size, $magicNumber, $result)) { + return true; + } + } + return false; + } + + + private function getEnoughContent($path) + { + $filename = $path; + $handle = fopen($filename, "r"); + $content = fread($handle, $this->enoughContentByteSize); + fclose($handle); + return $content; + } + + + /** + * @param $path + * @param $result + * @param $filename + * @return bool + */ + public function sniffMimeType($path, &$result, $filename) + { + $content = $this->getEnoughContent($path); + $result = "application/unknown"; + + if ($this->sniffForOfficeDocs($content, $filename, $result)) { + return true; + } + if ($this->sniffForMagicNumbers($content, $result)) { + return true; + } + if ($this->sniffForExtraMagicNumbers($content, $result)) { + return true; + } + if ($this->sniffForXMLOrHTML($content, $result)) { + return true; + } + + return $this->sniffBinary($content, $result); + } + + + private function sniffForMagicNumbers($content, &$result) + { + return $this->checkForMagicNumbers($content, strlen($content), $this->magicNumbers, $result); + } + + private function sniffForExtraMagicNumbers($content, &$result) + { + return $this->checkForMagicNumbers($content, strlen($content), $this->extraMagicNumbers, $result); + } + + private function looksLikeBinary($content) + { + // The definition of "binary bytes" is from the spec at + // https://mimesniff.spec.whatwg.org/#binary-data-byte + // + // The bytes which are considered to be "binary" are all < 0x20. Encode them + // one bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The + // least-significant bit represents byte 0x00, the most-significant bit + // represents byte 0x1F. + $kBinaryBits = ~(1 << ord("\t") | 1 << ord("\n") | 1 << ord("\r") | 1 << ord("\f") | 1 << ord("\x1b")); + for ($i = 0; $i < strlen($content); $i++) { + $byte = ord(substr($content, $i, 1)); + if ($byte < 0x20 && ($kBinaryBits & (1 << $byte))) { + return true; + } + } + return false; + } + + private function sniffBinary($content, &$result) + { + $byteOrderMark = [ + new MagicNumber("text/plain", "\xFE\xFF"), // UTF-16BE + new MagicNumber("text/plain", "\xFF\xFE"), // UTF-16LE + new MagicNumber("text/plain", "\xEF\xBB\xBF"), // UTF-8 + ]; + if ($this->checkForMagicNumbers($content, strlen($content), $byteOrderMark, $result)) { + return false; + } + if ($this->looksLikeBinary($content)) { + $result = "application/octet-stream"; + return true; + } + $result = "text/plain"; + return false; + } + + + private function sniffForXMLOrHTML($content, &$result) + { + return $this->checkForMagicNumbers($content, strlen($content), $this->magicXMLOrHTML, $result); + } + + private function sniffForOfficeDocs($content, $filename, &$result) + { + $officeVersion = ""; + if (!$this->checkForMagicNumbers($content, strlen($content), $this->officeMagicNumbers, $officeVersion)) { + + $_ = ""; + if ($this->checkForMagicNumbers($content, strlen($content), [new MagicNumber("application/xml", "officeExtensionTypes as $officeExtensionType) { + $extension = substr($filename, strlen($filename) - $officeExtensionType->getExtensionLength()); + if (strcmp(strtolower($extension), $officeExtensionType->getExtension()) === 0) { + $type = $officeExtensionType->getDocType(); + break; + } + } + + if ($type === OfficeDocType::NONE) { + return false; + } + if ((strcmp($officeVersion, "CFB") === 0)||(strcmp($officeVersion, "XML") === 0)) { + switch ($type) { + case OfficeDocType::WORD: + $result = "application/msword"; + return true; + case OfficeDocType::EXCEL: + $result = "application/vnd.ms-excel"; + return true; + case OfficeDocType::POWERPOINT: + $result = "application/vnd.ms-powerpoint"; + return true; + default: + return false; + } + } else if (strcmp($officeVersion, "OOXML") === 0) { + switch ($type) { + case OfficeDocType::WORD: + $result = "application/vnd.openxmlformats-officedocument." . + "wordprocessingml.document"; + return true; + case OfficeDocType::EXCEL: + $result = "application/vnd.openxmlformats-officedocument." . + "spreadsheetml.sheet"; + return true; + case OfficeDocType::POWERPOINT: + $result = "application/vnd.openxmlformats-officedocument." . + "presentationml.presentation"; + return true; + default: + return false; + } + } + return false; + } + + +} \ No newline at end of file diff --git a/src/OfficeDocType.php b/src/OfficeDocType.php new file mode 100644 index 0000000..dbae7a1 --- /dev/null +++ b/src/OfficeDocType.php @@ -0,0 +1,18 @@ +docType = $docType; + $this->extension = $extension; + $this->extensionLength = strlen($extension); + } + + /** + * @return int + */ + public function getDocType() + { + return $this->docType; + } + + /** + * @return string + */ + public function getExtension() + { + return $this->extension; + } + + /** + * @return int + */ + public function getExtensionLength() + { + return $this->extensionLength; + } +} \ No newline at end of file