This commit is contained in:
ivan deng
2023-07-21 18:56:03 +08:00
parent 35b7dbb4b7
commit 82a6525b55
44 changed files with 10368 additions and 0 deletions

View File

@@ -0,0 +1 @@
VERSION = (0, 10, 0)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,544 @@
# Copyright 2009 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the 'License')
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Python implementation of the UA parser."""
from __future__ import absolute_import
import os
import re
__author__ = "Lindsey Simon <elsigh@gmail.com>"
class UserAgentParser(object):
def __init__(
self, pattern, family_replacement=None, v1_replacement=None, v2_replacement=None
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
family_replacement: a string to override the matched family (optional)
v1_replacement: a string to override the matched v1 (optional)
v2_replacement: a string to override the matched v2 (optional)
"""
self.pattern = pattern
self.user_agent_re = re.compile(self.pattern)
self.family_replacement = family_replacement
self.v1_replacement = v1_replacement
self.v2_replacement = v2_replacement
def MatchSpans(self, user_agent_string):
match_spans = []
match = self.user_agent_re.search(user_agent_string)
if match:
match_spans = [
match.span(group_index) for group_index in range(1, match.lastindex + 1)
]
return match_spans
def Parse(self, user_agent_string):
family, v1, v2, v3 = None, None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.family_replacement:
if re.search(r"\$1", self.family_replacement):
family = re.sub(r"\$1", match.group(1), self.family_replacement)
else:
family = self.family_replacement
else:
family = match.group(1)
if self.v1_replacement:
v1 = self.v1_replacement
elif match.lastindex and match.lastindex >= 2:
v1 = match.group(2) or None
if self.v2_replacement:
v2 = self.v2_replacement
elif match.lastindex and match.lastindex >= 3:
v2 = match.group(3) or None
if match.lastindex and match.lastindex >= 4:
v3 = match.group(4) or None
return family, v1, v2, v3
class OSParser(object):
def __init__(
self,
pattern,
os_replacement=None,
os_v1_replacement=None,
os_v2_replacement=None,
os_v3_replacement=None,
os_v4_replacement=None,
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
os_replacement: a string to override the matched os (optional)
os_v1_replacement: a string to override the matched v1 (optional)
os_v2_replacement: a string to override the matched v2 (optional)
os_v3_replacement: a string to override the matched v3 (optional)
os_v4_replacement: a string to override the matched v4 (optional)
"""
self.pattern = pattern
self.user_agent_re = re.compile(self.pattern)
self.os_replacement = os_replacement
self.os_v1_replacement = os_v1_replacement
self.os_v2_replacement = os_v2_replacement
self.os_v3_replacement = os_v3_replacement
self.os_v4_replacement = os_v4_replacement
def MatchSpans(self, user_agent_string):
match_spans = []
match = self.user_agent_re.search(user_agent_string)
if match:
match_spans = [
match.span(group_index) for group_index in range(1, match.lastindex + 1)
]
return match_spans
def Parse(self, user_agent_string):
os, os_v1, os_v2, os_v3, os_v4 = None, None, None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.os_replacement:
os = MultiReplace(self.os_replacement, match)
elif match.lastindex:
os = match.group(1)
if self.os_v1_replacement:
os_v1 = MultiReplace(self.os_v1_replacement, match)
elif match.lastindex and match.lastindex >= 2:
os_v1 = match.group(2)
if self.os_v2_replacement:
os_v2 = MultiReplace(self.os_v2_replacement, match)
elif match.lastindex and match.lastindex >= 3:
os_v2 = match.group(3)
if self.os_v3_replacement:
os_v3 = MultiReplace(self.os_v3_replacement, match)
elif match.lastindex and match.lastindex >= 4:
os_v3 = match.group(4)
if self.os_v4_replacement:
os_v4 = MultiReplace(self.os_v4_replacement, match)
elif match.lastindex and match.lastindex >= 5:
os_v4 = match.group(5)
return os, os_v1, os_v2, os_v3, os_v4
def MultiReplace(string, match):
def _repl(m):
index = int(m.group(1)) - 1
group = match.groups()
if index < len(group):
return group[index]
return ""
_string = re.sub(r"\$(\d)", _repl, string)
_string = re.sub(r"^\s+|\s+$", "", _string)
if _string == "":
return None
return _string
class DeviceParser(object):
def __init__(
self,
pattern,
regex_flag=None,
device_replacement=None,
brand_replacement=None,
model_replacement=None,
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
device_replacement: a string to override the matched device (optional)
"""
self.pattern = pattern
if regex_flag == "i":
self.user_agent_re = re.compile(self.pattern, re.IGNORECASE)
else:
self.user_agent_re = re.compile(self.pattern)
self.device_replacement = device_replacement
self.brand_replacement = brand_replacement
self.model_replacement = model_replacement
def MatchSpans(self, user_agent_string):
match_spans = []
match = self.user_agent_re.search(user_agent_string)
if match:
match_spans = [
match.span(group_index) for group_index in range(1, match.lastindex + 1)
]
return match_spans
def Parse(self, user_agent_string):
device, brand, model = None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.device_replacement:
device = MultiReplace(self.device_replacement, match)
else:
device = match.group(1)
if self.brand_replacement:
brand = MultiReplace(self.brand_replacement, match)
if self.model_replacement:
model = MultiReplace(self.model_replacement, match)
elif len(match.groups()) > 0:
model = match.group(1)
return device, brand, model
MAX_CACHE_SIZE = 20
_parse_cache = {}
def Parse(user_agent_string, **jsParseBits):
""" Parse all the things
Args:
user_agent_string: the full user agent string
jsParseBits: javascript override bits
Returns:
A dictionary containing all parsed bits
"""
jsParseBits = jsParseBits or {}
key = (user_agent_string, repr(jsParseBits))
cached = _parse_cache.get(key)
if cached is not None:
return cached
if len(_parse_cache) > MAX_CACHE_SIZE:
_parse_cache.clear()
v = {
"user_agent": ParseUserAgent(user_agent_string, **jsParseBits),
"os": ParseOS(user_agent_string, **jsParseBits),
"device": ParseDevice(user_agent_string, **jsParseBits),
"string": user_agent_string,
}
_parse_cache[key] = v
return v
def ParseUserAgent(user_agent_string, **jsParseBits):
""" Parses the user-agent string for user agent (browser) info.
Args:
user_agent_string: The full user-agent string.
jsParseBits: javascript override bits.
Returns:
A dictionary containing parsed bits.
"""
if (
"js_user_agent_family" in jsParseBits
and jsParseBits["js_user_agent_family"] != ""
):
family = jsParseBits["js_user_agent_family"]
v1 = jsParseBits.get("js_user_agent_v1") or None
v2 = jsParseBits.get("js_user_agent_v2") or None
v3 = jsParseBits.get("js_user_agent_v3") or None
else:
for uaParser in USER_AGENT_PARSERS:
family, v1, v2, v3 = uaParser.Parse(user_agent_string)
if family:
break
# Override for Chrome Frame IFF Chrome is enabled.
if "js_user_agent_string" in jsParseBits:
js_user_agent_string = jsParseBits["js_user_agent_string"]
if (
js_user_agent_string
and js_user_agent_string.find("Chrome/") > -1
and user_agent_string.find("chromeframe") > -1
):
jsOverride = {}
jsOverride = ParseUserAgent(js_user_agent_string)
family = "Chrome Frame (%s %s)" % (family, v1)
v1 = jsOverride["major"]
v2 = jsOverride["minor"]
v3 = jsOverride["patch"]
family = family or "Other"
return {
"family": family,
"major": v1 or None,
"minor": v2 or None,
"patch": v3 or None,
}
def ParseOS(user_agent_string, **jsParseBits):
""" Parses the user-agent string for operating system info
Args:
user_agent_string: The full user-agent string.
jsParseBits: javascript override bits.
Returns:
A dictionary containing parsed bits.
"""
for osParser in OS_PARSERS:
os, os_v1, os_v2, os_v3, os_v4 = osParser.Parse(user_agent_string)
if os:
break
os = os or "Other"
return {
"family": os,
"major": os_v1,
"minor": os_v2,
"patch": os_v3,
"patch_minor": os_v4,
}
def ParseDevice(user_agent_string):
""" Parses the user-agent string for device info.
Args:
user_agent_string: The full user-agent string.
ua_family: The parsed user agent family name.
Returns:
A dictionary containing parsed bits.
"""
for deviceParser in DEVICE_PARSERS:
device, brand, model = deviceParser.Parse(user_agent_string)
if device:
break
if device is None:
device = "Other"
return {"family": device, "brand": brand, "model": model}
def PrettyUserAgent(family, v1=None, v2=None, v3=None):
"""Pretty user agent string."""
if v3:
if v3[0].isdigit():
return "%s %s.%s.%s" % (family, v1, v2, v3)
else:
return "%s %s.%s%s" % (family, v1, v2, v3)
elif v2:
return "%s %s.%s" % (family, v1, v2)
elif v1:
return "%s %s" % (family, v1)
return family
def PrettyOS(os, os_v1=None, os_v2=None, os_v3=None, os_v4=None):
"""Pretty os string."""
if os_v4:
return "%s %s.%s.%s.%s" % (os, os_v1, os_v2, os_v3, os_v4)
if os_v3:
if os_v3[0].isdigit():
return "%s %s.%s.%s" % (os, os_v1, os_v2, os_v3)
else:
return "%s %s.%s%s" % (os, os_v1, os_v2, os_v3)
elif os_v2:
return "%s %s.%s" % (os, os_v1, os_v2)
elif os_v1:
return "%s %s" % (os, os_v1)
return os
def ParseWithJSOverrides(
user_agent_string,
js_user_agent_string=None,
js_user_agent_family=None,
js_user_agent_v1=None,
js_user_agent_v2=None,
js_user_agent_v3=None,
):
""" backwards compatible. use one of the other Parse methods instead! """
# Override via JS properties.
if js_user_agent_family is not None and js_user_agent_family != "":
family = js_user_agent_family
v1 = None
v2 = None
v3 = None
if js_user_agent_v1 is not None:
v1 = js_user_agent_v1
if js_user_agent_v2 is not None:
v2 = js_user_agent_v2
if js_user_agent_v3 is not None:
v3 = js_user_agent_v3
else:
for parser in USER_AGENT_PARSERS:
family, v1, v2, v3 = parser.Parse(user_agent_string)
if family:
break
# Override for Chrome Frame IFF Chrome is enabled.
if (
js_user_agent_string
and js_user_agent_string.find("Chrome/") > -1
and user_agent_string.find("chromeframe") > -1
):
family = "Chrome Frame (%s %s)" % (family, v1)
ua_dict = ParseUserAgent(js_user_agent_string)
v1 = ua_dict["major"]
v2 = ua_dict["minor"]
v3 = ua_dict["patch"]
return family or "Other", v1, v2, v3
def Pretty(family, v1=None, v2=None, v3=None):
""" backwards compatible. use PrettyUserAgent instead! """
if v3:
if v3[0].isdigit():
return "%s %s.%s.%s" % (family, v1, v2, v3)
else:
return "%s %s.%s%s" % (family, v1, v2, v3)
elif v2:
return "%s %s.%s" % (family, v1, v2)
elif v1:
return "%s %s" % (family, v1)
return family
def GetFilters(
user_agent_string,
js_user_agent_string=None,
js_user_agent_family=None,
js_user_agent_v1=None,
js_user_agent_v2=None,
js_user_agent_v3=None,
):
"""Return the optional arguments that should be saved and used to query.
js_user_agent_string is always returned if it is present. We really only need
it for Chrome Frame. However, I added it in the generally case to find other
cases when it is different. When the recording of js_user_agent_string was
added, we created new records for all new user agents.
Since we only added js_document_mode for the IE 9 preview case, it did not
cause new user agent records the way js_user_agent_string did.
js_document_mode has since been removed in favor of individual property
overrides.
Args:
user_agent_string: The full user-agent string.
js_user_agent_string: JavaScript ua string from client-side
js_user_agent_family: This is an override for the family name to deal
with the fact that IE platform preview (for instance) cannot be
distinguished by user_agent_string, but only in javascript.
js_user_agent_v1: v1 override - see above.
js_user_agent_v2: v1 override - see above.
js_user_agent_v3: v1 override - see above.
Returns:
{js_user_agent_string: '[...]', js_family_name: '[...]', etc...}
"""
filters = {}
filterdict = {
"js_user_agent_string": js_user_agent_string,
"js_user_agent_family": js_user_agent_family,
"js_user_agent_v1": js_user_agent_v1,
"js_user_agent_v2": js_user_agent_v2,
"js_user_agent_v3": js_user_agent_v3,
}
for key, value in filterdict.items():
if value is not None and value != "":
filters[key] = value
return filters
# Build the list of user agent parsers from YAML
UA_PARSER_YAML = os.environ.get("UA_PARSER_YAML")
if UA_PARSER_YAML:
# This will raise an ImportError if missing, obviously since it's no
# longer a requirement
import yaml
try:
# Try and use libyaml bindings if available since faster
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
with open(UA_PARSER_YAML) as fp:
regexes = yaml.load(fp, Loader=SafeLoader)
USER_AGENT_PARSERS = []
for _ua_parser in regexes["user_agent_parsers"]:
_regex = _ua_parser["regex"]
_family_replacement = _ua_parser.get("family_replacement")
_v1_replacement = _ua_parser.get("v1_replacement")
_v2_replacement = _ua_parser.get("v2_replacement")
USER_AGENT_PARSERS.append(
UserAgentParser(
_regex, _family_replacement, _v1_replacement, _v2_replacement
)
)
OS_PARSERS = []
for _os_parser in regexes["os_parsers"]:
_regex = _os_parser["regex"]
_os_replacement = _os_parser.get("os_replacement")
_os_v1_replacement = _os_parser.get("os_v1_replacement")
_os_v2_replacement = _os_parser.get("os_v2_replacement")
_os_v3_replacement = _os_parser.get("os_v3_replacement")
_os_v4_replacement = _os_parser.get("os_v4_replacement")
OS_PARSERS.append(
OSParser(
_regex,
_os_replacement,
_os_v1_replacement,
_os_v2_replacement,
_os_v3_replacement,
_os_v4_replacement,
)
)
DEVICE_PARSERS = []
for _device_parser in regexes["device_parsers"]:
_regex = _device_parser["regex"]
_regex_flag = _device_parser.get("regex_flag")
_device_replacement = _device_parser.get("device_replacement")
_brand_replacement = _device_parser.get("brand_replacement")
_model_replacement = _device_parser.get("model_replacement")
DEVICE_PARSERS.append(
DeviceParser(
_regex,
_regex_flag,
_device_replacement,
_brand_replacement,
_model_replacement,
)
)
# Clean our our temporary vars explicitly
# so they can't be reused or imported
del regexes
del yaml
del SafeLoader
else:
# Just load our pre-compiled versions
from ._regexes import USER_AGENT_PARSERS, DEVICE_PARSERS, OS_PARSERS

View File

@@ -0,0 +1,290 @@
#!/usr/bin/python2.5
#
# Copyright 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the 'License')
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""User Agent Parser Unit Tests.
Run:
# python -m user_agent_parser_test (runs all the tests, takes awhile)
or like:
# python -m user_agent_parser_test ParseTest.testBrowserscopeStrings
"""
from __future__ import unicode_literals, absolute_import
__author__ = "slamm@google.com (Stephen Lamm)"
import os
import re
import unittest
import yaml
try:
# Try and use libyaml bindings if available since faster
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
from ua_parser import user_agent_parser
TEST_RESOURCES_DIR = os.path.join(
os.path.abspath(os.path.dirname(__file__)), "../uap-core"
)
class ParseTest(unittest.TestCase):
def testBrowserscopeStrings(self):
self.runUserAgentTestsFromYAML(
os.path.join(TEST_RESOURCES_DIR, "tests/test_ua.yaml")
)
def testBrowserscopeStringsOS(self):
self.runOSTestsFromYAML(os.path.join(TEST_RESOURCES_DIR, "tests/test_os.yaml"))
def testStringsOS(self):
self.runOSTestsFromYAML(
os.path.join(TEST_RESOURCES_DIR, "test_resources/additional_os_tests.yaml")
)
def testStringsDevice(self):
self.runDeviceTestsFromYAML(
os.path.join(TEST_RESOURCES_DIR, "tests/test_device.yaml")
)
def testMozillaStrings(self):
self.runUserAgentTestsFromYAML(
os.path.join(
TEST_RESOURCES_DIR, "test_resources/firefox_user_agent_strings.yaml"
)
)
# NOTE: The YAML file used here is one output by makePGTSComparisonYAML()
# below, as opposed to the pgts_browser_list-orig.yaml file. The -orig
# file is by no means perfect, but identifies many browsers that we
# classify as "Other". This test itself is mostly useful to know when
# somthing in UA parsing changes. An effort should be made to try and
# reconcile the differences between the two YAML files.
def testPGTSStrings(self):
self.runUserAgentTestsFromYAML(
os.path.join(TEST_RESOURCES_DIR, "test_resources/pgts_browser_list.yaml")
)
def testParseAll(self):
user_agent_string = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; fr; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5,gzip(gfe),gzip(gfe)"
expected = {
"device": {"family": "Mac", "brand": "Apple", "model": "Mac"},
"os": {
"family": "Mac OS X",
"major": "10",
"minor": "4",
"patch": None,
"patch_minor": None,
},
"user_agent": {
"family": "Firefox",
"major": "3",
"minor": "5",
"patch": "5",
},
"string": user_agent_string,
}
result = user_agent_parser.Parse(user_agent_string)
self.assertEqual(
result,
expected,
"UA: {0}\n expected<{1}> != actual<{2}>".format(
user_agent_string, expected, result
),
)
# Make a YAML file for manual comparsion with pgts_browser_list-orig.yaml
def makePGTSComparisonYAML(self):
import codecs
outfile = codecs.open("outfile.yaml", "w", "utf-8")
print >> outfile, "test_cases:"
yamlFile = open(os.path.join(TEST_RESOURCES_DIR, "pgts_browser_list.yaml"))
yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
yamlFile.close()
for test_case in yamlContents["test_cases"]:
user_agent_string = test_case["user_agent_string"]
kwds = {}
if "js_ua" in test_case:
kwds = eval(test_case["js_ua"])
(family, major, minor, patch) = user_agent_parser.ParseUserAgent(
user_agent_string, **kwds
)
# Escape any double-quotes in the UA string
user_agent_string = re.sub(r'"', '\\"', user_agent_string)
print >> outfile, ' - user_agent_string: "' + user_agent_string + '"' + "\n" + ' family: "' + family + '"\n' + " major: " + (
"" if (major is None) else "'" + major + "'"
) + "\n" + " minor: " + (
"" if (minor is None) else "'" + minor + "'"
) + "\n" + " patch: " + (
"" if (patch is None) else "'" + patch + "'"
)
outfile.close()
# Run a set of test cases from a YAML file
def runUserAgentTestsFromYAML(self, file_name):
yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))
yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
yamlFile.close()
for test_case in yamlContents["test_cases"]:
# Inputs to Parse()
user_agent_string = test_case["user_agent_string"]
kwds = {}
if "js_ua" in test_case:
kwds = eval(test_case["js_ua"])
# The expected results
expected = {
"family": test_case["family"],
"major": test_case["major"],
"minor": test_case["minor"],
"patch": test_case["patch"],
}
result = {}
result = user_agent_parser.ParseUserAgent(user_agent_string, **kwds)
self.assertEqual(
result,
expected,
"UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format(
user_agent_string,
expected["family"],
expected["major"],
expected["minor"],
expected["patch"],
result["family"],
result["major"],
result["minor"],
result["patch"],
),
)
def runOSTestsFromYAML(self, file_name):
yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))
yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
yamlFile.close()
for test_case in yamlContents["test_cases"]:
# Inputs to Parse()
user_agent_string = test_case["user_agent_string"]
kwds = {}
if "js_ua" in test_case:
kwds = eval(test_case["js_ua"])
# The expected results
expected = {
"family": test_case["family"],
"major": test_case["major"],
"minor": test_case["minor"],
"patch": test_case["patch"],
"patch_minor": test_case["patch_minor"],
}
result = user_agent_parser.ParseOS(user_agent_string, **kwds)
self.assertEqual(
result,
expected,
"UA: {0}\n expected<{1} {2} {3} {4} {5}> != actual<{6} {7} {8} {9} {10}>".format(
user_agent_string,
expected["family"],
expected["major"],
expected["minor"],
expected["patch"],
expected["patch_minor"],
result["family"],
result["major"],
result["minor"],
result["patch"],
result["patch_minor"],
),
)
def runDeviceTestsFromYAML(self, file_name):
yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))
yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
yamlFile.close()
for test_case in yamlContents["test_cases"]:
# Inputs to Parse()
user_agent_string = test_case["user_agent_string"]
kwds = {}
if "js_ua" in test_case:
kwds = eval(test_case["js_ua"])
# The expected results
expected = {
"family": test_case["family"],
"brand": test_case["brand"],
"model": test_case["model"],
}
result = user_agent_parser.ParseDevice(user_agent_string, **kwds)
self.assertEqual(
result,
expected,
"UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format(
user_agent_string,
expected["family"],
expected["brand"],
expected["model"],
result["family"],
result["brand"],
result["model"],
),
)
class GetFiltersTest(unittest.TestCase):
def testGetFiltersNoMatchesGiveEmptyDict(self):
user_agent_string = "foo"
filters = user_agent_parser.GetFilters(
user_agent_string, js_user_agent_string=None
)
self.assertEqual({}, filters)
def testGetFiltersJsUaPassedThrough(self):
user_agent_string = "foo"
filters = user_agent_parser.GetFilters(
user_agent_string, js_user_agent_string="bar"
)
self.assertEqual({"js_user_agent_string": "bar"}, filters)
def testGetFiltersJsUserAgentFamilyAndVersions(self):
user_agent_string = (
"Mozilla/4.0 (compatible; MSIE 8.0; "
"Windows NT 5.1; Trident/4.0; GTB6; .NET CLR 2.0.50727; "
".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
)
filters = user_agent_parser.GetFilters(
user_agent_string, js_user_agent_string="bar", js_user_agent_family="foo"
)
self.assertEqual(
{"js_user_agent_string": "bar", "js_user_agent_family": "foo"}, filters
)
if __name__ == "__main__":
unittest.main()