Package madrona :: Package common :: Package uaparser :: Module parser
[hide private]

Source Code for Module madrona.common.uaparser.parser

  1  #!/usr/bin/python2.4 
  2  # 
  3  # Copyright 2009 Google Inc. 
  4  # 
  5  # Licensed under the Apache License, Version 2.0 (the "License"); 
  6  # you may not use this file except in compliance with the License. 
  7  # You may obtain a copy of the License at 
  8  # 
  9  #     http://www.apache.org/licenses/LICENSE-2.0 
 10  # 
 11  # Unless required by applicable law or agreed to in writing, software 
 12  # distributed under the License is distributed on an "AS IS" BASIS, 
 13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 14  # See the License for the specific language governing permissions and 
 15  # limitations under the License. 
 16   
 17  """Parser.""" 
 18   
 19  import re 
 20  import regexes 
21 22 -class UserAgent(object):
23 - def __init__(self, user_agent_string, js_user_agent_string=None):
24 self.user_agent_string = user_agent_string 25 self.js_user_agent_string = js_user_agent_string 26 for ua_parser in regexes.USER_AGENT_PARSERS: 27 family, v1, v2, v3 = ua_parser.Parse(user_agent_string) 28 if family: 29 break 30 31 if js_user_agent_string and user_agent_string.find('chromeframe') > -1: 32 family = 'Chrome Frame (%s %s)' % (family, v1) 33 cf_family, v1, v2, v3 = cls.parse(js_user_agent_string) 34 35 self.family = family or 'Other' 36 self.v1 = v1 37 self.v2 = v2 38 self.v3 = v3
39
40 - def pretty(self):
41 return UserAgent.pretty_print(self.family, self.v1, self.v2, self.v3)
42 43 @staticmethod
44 - def parse_pretty(pretty_string):
45 """Parse a user agent pretty (e.g. 'Chrome 4.0.203') to parts. 46 47 Args: 48 pretty_string: a user agent pretty string (e.g. 'Chrome 4.0.203') 49 Returns: 50 [family, v1, v2, v3] e.g. ['Chrome', '4', '0', '203'] 51 """ 52 v1, v2, v3 = None, None, None 53 family, sep, version_str = pretty_string.rpartition(' ') 54 if not family: 55 family = version_str 56 else: 57 version_bits = version_str.split('.') 58 v1 = version_bits.pop(0) 59 if not v1.isdigit(): 60 family = pretty_string 61 v1 = None 62 elif version_bits: 63 v2 = version_bits.pop(0) 64 if not v2.isdigit(): 65 nondigit_index = min(i for i, c in enumerate(v2) if not c.isdigit()) 66 v2, v3 = v2[:nondigit_index], v2[nondigit_index:] 67 elif version_bits: 68 v3 = version_bits.pop(0) 69 return family, v1, v2, v3
70 71 @staticmethod
72 - def pretty_print(family, v1=None, v2=None, v3=None):
73 """Pretty browser string.""" 74 if v3: 75 if v3[0].isdigit(): 76 return '%s %s.%s.%s' % (family, v1, v2, v3) 77 else: 78 return '%s %s.%s%s' % (family, v1, v2, v3) 79 elif v2: 80 return '%s %s.%s' % (family, v1, v2) 81 elif v1: 82 return '%s %s' % (family, v1) 83 return family
84
85 -class UserAgentParser(object):
86 - def __init__(self, pattern, family_replacement=None, v1_replacement=None):
87 """Initialize UserAgentParser. 88 89 Args: 90 pattern: a regular expression string 91 family_replacement: a string to override the matched family (optional) 92 v1_replacement: a string to override the matched v1 (optional) 93 """ 94 self.pattern = pattern 95 self.user_agent_re = re.compile(self.pattern) 96 self.family_replacement = family_replacement 97 self.v1_replacement = v1_replacement
98
99 - def MatchSpans(self, user_agent_string):
100 match_spans = [] 101 match = self.user_agent_re.search(user_agent_string) 102 if match: 103 match_spans = [match.span(group_index) 104 for group_index in range(1, match.lastindex + 1)] 105 return match_spans
106
107 - def Parse(self, user_agent_string):
108 family, v1, v2, v3 = None, None, None, None 109 match = self.user_agent_re.search(user_agent_string) 110 if match: 111 if self.family_replacement: 112 if re.search(r'\$1', self.family_replacement): 113 family = re.sub(r'\$1', match.group(1), self.family_replacement) 114 else: 115 family = self.family_replacement 116 else: 117 family = match.group(1) 118 119 if self.v1_replacement: 120 v1 = self.v1_replacement 121 elif match.lastindex >= 2: 122 v1 = match.group(2) 123 if match.lastindex >= 3: 124 v2 = match.group(3) 125 if match.lastindex >= 4: 126 v3 = match.group(4) 127 return family, v1, v2, v3
128