From d45cd4ea62c0aaca2c55e773ab4fdaa9e2ed8961 Mon Sep 17 00:00:00 2001
From: chenc <1458513@qq.com>
Date: Fri, 12 May 2023 09:07:47 +0800
Subject: [PATCH] init code
---
.idea/.gitignore | 8 +
.idea/mime-type-sniffer.iml | 12 +
.idea/misc.xml | 6 +
.idea/modules.xml | 8 +
.idea/php.xml | 20 ++
.idea/vcs.xml | 6 +
composer.json | 24 ++
src/MagicNumber.php | 99 +++++++++
src/MimeTypeSniffer.php | 426 ++++++++++++++++++++++++++++++++++++
src/OfficeDocType.php | 18 ++
src/OfficeExtensionType.php | 65 ++++++
11 files changed, 692 insertions(+)
create mode 100644 .idea/.gitignore
create mode 100644 .idea/mime-type-sniffer.iml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/php.xml
create mode 100644 .idea/vcs.xml
create mode 100644 composer.json
create mode 100644 src/MagicNumber.php
create mode 100644 src/MimeTypeSniffer.php
create mode 100644 src/OfficeDocType.php
create mode 100644 src/OfficeExtensionType.php
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..35410ca
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/mime-type-sniffer.iml b/.idea/mime-type-sniffer.iml
new file mode 100644
index 0000000..4980e63
--- /dev/null
+++ b/.idea/mime-type-sniffer.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..3ce3588
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..4159e82
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/php.xml b/.idea/php.xml
new file mode 100644
index 0000000..6f95ace
--- /dev/null
+++ b/.idea/php.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..c8397c9
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..388f0d0
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,24 @@
+{
+ "name": "aix/mime-type-sniffer",
+ "description": "jsaix mime-type-sniffer",
+ "version": "1.0.0",
+ "type": "libs",
+ "license": "JSAIX LICENSE",
+ "authors": [
+ {
+ "name": "chenc",
+ "email": "1458513@qq.com"
+ }
+ ],
+ "require": {
+ "php": ">=7.3.0"
+ },
+ "autoload": {
+ "classmap": [
+ "src/"
+ ],
+ "psr-4": {
+ "MimeTypeSniffer\\": "src/"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/MagicNumber.php b/src/MagicNumber.php
new file mode 100644
index 0000000..44f20c0
--- /dev/null
+++ b/src/MagicNumber.php
@@ -0,0 +1,99 @@
+mimeType = $mimeType;
+ $this->magic = $magic;
+ $this->magicLength = strlen($magic);
+ $this->isString = $isString;
+ if ($mask && (strlen($mask) !== strlen($magic))) {
+ throw new \RuntimeException("magic and mask sizes must be equal");
+ }
+ $this->mask = $mask;
+ }
+
+ /**
+ * @return string
+ */
+ public function getMimeType()
+ {
+ return $this->mimeType;
+ }
+
+ /**
+ * @return string
+ */
+ public function getMagic()
+ {
+ return $this->magic;
+ }
+
+ /**
+ * @return bool
+ */
+ public function isString()
+ {
+ return $this->isString;
+ }
+
+ /**
+ * @return string
+ */
+ public function getMask()
+ {
+ return $this->mask;
+ }
+
+ /**
+ * @return int
+ */
+ public function getMagicLength()
+ {
+ return $this->magicLength;
+ }
+
+
+}
\ No newline at end of file
diff --git a/src/MimeTypeSniffer.php b/src/MimeTypeSniffer.php
new file mode 100644
index 0000000..c9702bc
--- /dev/null
+++ b/src/MimeTypeSniffer.php
@@ -0,0 +1,426 @@
+magicNumbers = [
+ new MagicNumber("application/pdf", "%PDF-"),
+ new MagicNumber("application/postscript", "%!PS-Adobe-"),
+ new MagicNumber("image/gif", "GIF87a"),
+ new MagicNumber("image/gif", "GIF89a"),
+ new MagicNumber("image/png", "\x89" . "PNG\x0D\x0A\x1A\x0A"),
+ new MagicNumber("image/jpeg", "\xFF\xD8\xFF"),
+ new MagicNumber("image/bmp", "BM"),
+ // Source: Mozilla
+ new MagicNumber("text/plain", "#!"), // Script
+ new MagicNumber("text/plain", "%!"), // Script, similar to PS
+ new MagicNumber("text/plain", "From"),
+ new MagicNumber("text/plain", ">From"),
+ // Chrome specific
+ new MagicNumber("application/x-gzip", "\x1F\x8B\x08"),
+ new MagicNumber("audio/x-pn-realaudio", "\x2E\x52\x4D\x46"),
+ new MagicNumber("video/x-ms-asf",
+ "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"),
+ new MagicNumber("image/tiff", "I I"),
+ new MagicNumber("image/tiff", "II*"),
+ new MagicNumber("image/tiff", "MM\x00*"),
+ new MagicNumber("audio/mpeg", "ID3"),
+ new MagicNumber("image/webp", "RIFF....WEBPVP"),
+ new MagicNumber("video/webm", "\x1A\x45\xDF\xA3"),
+ new MagicNumber("application/zip", "PK\x03\x04"),
+ new MagicNumber("application/x-rar-compressed", "Rar!\x1A\x07\x00"),
+ new MagicNumber("application/x-msmetafile", "\xD7\xCD\xC6\x9A"),
+ new MagicNumber("application/octet-stream", "MZ"),
+
+ new MagicNumber("application/x-chrome-extension", "Cr24\x02\x00\x00\x00"),
+ new MagicNumber("application/x-chrome-extension", "Cr24\x03\x00\x00\x00"),
+
+ // Sniffing for Flash:
+ //
+ // new MagicNumber("application/x-shockwave-flash", "CWS"),
+ // new MagicNumber("application/x-shockwave-flash", "FLV"),
+ // new MagicNumber("application/x-shockwave-flash", "FWS"),
+ //
+ // Including these magic number for Flash is a trade off.
+ //
+ // Pros:
+ // * Flash is an important and popular file format
+ //
+ // Cons:
+ // * These patterns are fairly weak
+ // * If we mistakenly decide something is Flash, we will execute it
+ // in the origin of an unsuspecting site. This could be a security
+ // vulnerability if the site allows users to upload content.
+ //
+ // On balance, we do not include these patterns.
+ ];
+
+
+ $this->officeMagicNumbers = [
+ new MagicNumber("CFB", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"),
+ new MagicNumber("OOXML", "PK\x03\x04"),
+ ];
+
+
+ $this->magicXMLOrHTML = [
+ new MagicNumber("application/atom+xml", "magicXMLOrHTML[] = new MagicNumber("text/html", "<" . $htmlTag, true);
+ }
+
+
+ $this->extraMagicNumbers = [
+ new MagicNumber("image/x-xbitmap", "#define"),
+ new MagicNumber("image/x-icon", "\x00\x00\x01\x00"),
+ new MagicNumber("image/svg+xml", "officeExtensionTypes = [
+ new OfficeExtensionType(OfficeDocType::WORD, ".doc"),
+ new OfficeExtensionType(OfficeDocType::EXCEL, ".xls"),
+ new OfficeExtensionType(OfficeDocType::POWERPOINT, ".ppt"),
+ new OfficeExtensionType(OfficeDocType::WORD, ".docx"),
+ new OfficeExtensionType(OfficeDocType::EXCEL, ".xlsx"),
+ new OfficeExtensionType(OfficeDocType::POWERPOINT, ".pptx"),
+ ];
+
+ }
+
+ /**
+ * MimeTypeSniffer constructor.
+ */
+ public function __construct()
+ {
+ $this->initialize();
+ }
+
+
+ /**
+ * @param string $magic
+ * @param string $content
+ * @param int $len
+ * @return bool
+ */
+ private function magicCmp($magic, $content, $len)
+ {
+ $cursor = 0;
+ while ($len) {
+ $magicChar = ord(substr($magic, $cursor, 1));
+ $contentChar = ord(substr($content, $cursor, 1));
+ if (($magicChar !== ord(".")) && ($magicChar !== $contentChar)) {
+ return false;
+ }
+ $cursor++;
+ $len--;
+ }
+ return true;
+ }
+
+
+ /**
+ * @param string $magic
+ * @param string $content
+ * @param int $len
+ * @param string $mask
+ * @return bool
+ */
+ private function magicMaskCmp($magic, $content, $len, $mask)
+ {
+ $cursor = 0;
+ while ($len) {
+ $magicChar = ord(substr($magic, $cursor, 1));
+ $contentChar = ord(substr($content, $cursor, 1));
+ $maskChar = ord(substr($mask, $cursor, 1));
+ if (($magicChar !== ord(".")) && ($magicChar !== ($maskChar & $contentChar))) {
+ return false;
+ }
+ $cursor++;
+ $len--;
+ }
+ return true;
+ }
+
+ /**
+ * @param string $content
+ * @param int $size
+ * @param MagicNumber $magicNumber
+ * @param string $result
+ * @return bool
+ */
+ private function matchMagicNumber($content, $size, $magicNumber, &$result)
+ {
+ $len = $magicNumber->getMagicLength();
+ $match = false;
+
+ if ($magicNumber->isString()) {
+ $match = strcmp(strtolower(substr($content, 0, $len)), $magicNumber->getMagic()) === 0;
+ } else {
+ if ($size > $len) {
+ if ($magicNumber->getMask()) {
+ $match = $this->magicMaskCmp($magicNumber->getMagic(), $content, $len, $magicNumber->getMask());
+ } else {
+ $match = $this->magicCmp($magicNumber->getMagic(), $content, $len);
+ }
+ }
+ }
+
+ if ($match) {
+ $result = $magicNumber->getMimeType();
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * @param string $content
+ * @param int $size
+ * @param MagicNumber[] $magicNumbers
+ * @param string $result
+ * @return bool
+ */
+ private function checkForMagicNumbers($content, $size, $magicNumbers, &$result)
+ {
+ foreach ($magicNumbers as $magicNumber) {
+ if ($this->matchMagicNumber($content, $size, $magicNumber, $result)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ private function getEnoughContent($path)
+ {
+ $filename = $path;
+ $handle = fopen($filename, "r");
+ $content = fread($handle, $this->enoughContentByteSize);
+ fclose($handle);
+ return $content;
+ }
+
+
+ /**
+ * @param $path
+ * @param $result
+ * @param $filename
+ * @return bool
+ */
+ public function sniffMimeType($path, &$result, $filename)
+ {
+ $content = $this->getEnoughContent($path);
+ $result = "application/unknown";
+
+ if ($this->sniffForOfficeDocs($content, $filename, $result)) {
+ return true;
+ }
+ if ($this->sniffForMagicNumbers($content, $result)) {
+ return true;
+ }
+ if ($this->sniffForExtraMagicNumbers($content, $result)) {
+ return true;
+ }
+ if ($this->sniffForXMLOrHTML($content, $result)) {
+ return true;
+ }
+
+ return $this->sniffBinary($content, $result);
+ }
+
+
+ private function sniffForMagicNumbers($content, &$result)
+ {
+ return $this->checkForMagicNumbers($content, strlen($content), $this->magicNumbers, $result);
+ }
+
+ private function sniffForExtraMagicNumbers($content, &$result)
+ {
+ return $this->checkForMagicNumbers($content, strlen($content), $this->extraMagicNumbers, $result);
+ }
+
+ private function looksLikeBinary($content)
+ {
+ // The definition of "binary bytes" is from the spec at
+ // https://mimesniff.spec.whatwg.org/#binary-data-byte
+ //
+ // The bytes which are considered to be "binary" are all < 0x20. Encode them
+ // one bit per byte, with 1 for a "binary" bit, and 0 for a "text" bit. The
+ // least-significant bit represents byte 0x00, the most-significant bit
+ // represents byte 0x1F.
+ $kBinaryBits = ~(1 << ord("\t") | 1 << ord("\n") | 1 << ord("\r") | 1 << ord("\f") | 1 << ord("\x1b"));
+ for ($i = 0; $i < strlen($content); $i++) {
+ $byte = ord(substr($content, $i, 1));
+ if ($byte < 0x20 && ($kBinaryBits & (1 << $byte))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private function sniffBinary($content, &$result)
+ {
+ $byteOrderMark = [
+ new MagicNumber("text/plain", "\xFE\xFF"), // UTF-16BE
+ new MagicNumber("text/plain", "\xFF\xFE"), // UTF-16LE
+ new MagicNumber("text/plain", "\xEF\xBB\xBF"), // UTF-8
+ ];
+ if ($this->checkForMagicNumbers($content, strlen($content), $byteOrderMark, $result)) {
+ return false;
+ }
+ if ($this->looksLikeBinary($content)) {
+ $result = "application/octet-stream";
+ return true;
+ }
+ $result = "text/plain";
+ return false;
+ }
+
+
+ private function sniffForXMLOrHTML($content, &$result)
+ {
+ return $this->checkForMagicNumbers($content, strlen($content), $this->magicXMLOrHTML, $result);
+ }
+
+ private function sniffForOfficeDocs($content, $filename, &$result)
+ {
+ $officeVersion = "";
+ if (!$this->checkForMagicNumbers($content, strlen($content), $this->officeMagicNumbers, $officeVersion)) {
+
+ $_ = "";
+ if ($this->checkForMagicNumbers($content, strlen($content), [new MagicNumber("application/xml", "officeExtensionTypes as $officeExtensionType) {
+ $extension = substr($filename, strlen($filename) - $officeExtensionType->getExtensionLength());
+ if (strcmp(strtolower($extension), $officeExtensionType->getExtension()) === 0) {
+ $type = $officeExtensionType->getDocType();
+ break;
+ }
+ }
+
+ if ($type === OfficeDocType::NONE) {
+ return false;
+ }
+ if ((strcmp($officeVersion, "CFB") === 0)||(strcmp($officeVersion, "XML") === 0)) {
+ switch ($type) {
+ case OfficeDocType::WORD:
+ $result = "application/msword";
+ return true;
+ case OfficeDocType::EXCEL:
+ $result = "application/vnd.ms-excel";
+ return true;
+ case OfficeDocType::POWERPOINT:
+ $result = "application/vnd.ms-powerpoint";
+ return true;
+ default:
+ return false;
+ }
+ } else if (strcmp($officeVersion, "OOXML") === 0) {
+ switch ($type) {
+ case OfficeDocType::WORD:
+ $result = "application/vnd.openxmlformats-officedocument." .
+ "wordprocessingml.document";
+ return true;
+ case OfficeDocType::EXCEL:
+ $result = "application/vnd.openxmlformats-officedocument." .
+ "spreadsheetml.sheet";
+ return true;
+ case OfficeDocType::POWERPOINT:
+ $result = "application/vnd.openxmlformats-officedocument." .
+ "presentationml.presentation";
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+ }
+
+
+}
\ No newline at end of file
diff --git a/src/OfficeDocType.php b/src/OfficeDocType.php
new file mode 100644
index 0000000..dbae7a1
--- /dev/null
+++ b/src/OfficeDocType.php
@@ -0,0 +1,18 @@
+docType = $docType;
+ $this->extension = $extension;
+ $this->extensionLength = strlen($extension);
+ }
+
+ /**
+ * @return int
+ */
+ public function getDocType()
+ {
+ return $this->docType;
+ }
+
+ /**
+ * @return string
+ */
+ public function getExtension()
+ {
+ return $this->extension;
+ }
+
+ /**
+ * @return int
+ */
+ public function getExtensionLength()
+ {
+ return $this->extensionLength;
+ }
+}
\ No newline at end of file