1# Copyright 2014 Joe Gregorio 2# 3# Licensed under the MIT License 4 5"""MIME-Type Parser 6 7This module provides basic functions for handling mime-types. It can handle 8matching mime-types against a list of media-ranges. See section 14.1 of the 9HTTP specification [RFC 2616] for a complete explanation. 10 11 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 12 13Contents: 14 - parse_mime_type(): Parses a mime-type into its component parts. 15 - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' 16 quality parameter. 17 - quality(): Determines the quality ('q') of a mime-type when 18 compared against a list of media-ranges. 19 - quality_parsed(): Just like quality() except the second parameter must be 20 pre-parsed. 21 - best_match(): Choose the mime-type with the highest quality ('q') 22 from a list of candidates. 23""" 24from __future__ import absolute_import 25from functools import reduce 26 27__version__ = "0.1.3" 28__author__ = "Joe Gregorio" 29__email__ = "[email protected]" 30__license__ = "MIT License" 31__credits__ = "" 32 33 34def parse_mime_type(mime_type): 35 """Parses a mime-type into its component parts. 36 37 Carves up a mime-type and returns a tuple of the (type, subtype, params) 38 where 'params' is a dictionary of all the parameters for the media range. 39 For example, the media range 'application/xhtml;q=0.5' would get parsed 40 into: 41 42 ('application', 'xhtml', {'q', '0.5'}) 43 """ 44 parts = mime_type.split(";") 45 params = dict( 46 [tuple([s.strip() for s in param.split("=", 1)]) for param in parts[1:]] 47 ) 48 full_type = parts[0].strip() 49 # Java URLConnection class sends an Accept header that includes a 50 # single '*'. Turn it into a legal wildcard. 51 if full_type == "*": 52 full_type = "*/*" 53 (type, subtype) = full_type.split("/") 54 55 return (type.strip(), subtype.strip(), params) 56 57 58def parse_media_range(range): 59 """Parse a media-range into its component parts. 60 61 Carves up a media range and returns a tuple of the (type, subtype, 62 params) where 'params' is a dictionary of all the parameters for the media 63 range. For example, the media range 'application/*;q=0.5' would get parsed 64 into: 65 66 ('application', '*', {'q', '0.5'}) 67 68 In addition this function also guarantees that there is a value for 'q' 69 in the params dictionary, filling it in with a proper default if 70 necessary. 71 """ 72 (type, subtype, params) = parse_mime_type(range) 73 if ( 74 "q" not in params 75 or not params["q"] 76 or not float(params["q"]) 77 or float(params["q"]) > 1 78 or float(params["q"]) < 0 79 ): 80 params["q"] = "1" 81 82 return (type, subtype, params) 83 84 85def fitness_and_quality_parsed(mime_type, parsed_ranges): 86 """Find the best match for a mime-type amongst parsed media-ranges. 87 88 Find the best match for a given mime-type against a list of media_ranges 89 that have already been parsed by parse_media_range(). Returns a tuple of 90 the fitness value and the value of the 'q' quality parameter of the best 91 match, or (-1, 0) if no match was found. Just as for quality_parsed(), 92 'parsed_ranges' must be a list of parsed media ranges. 93 """ 94 best_fitness = -1 95 best_fit_q = 0 96 (target_type, target_subtype, target_params) = parse_media_range(mime_type) 97 for (type, subtype, params) in parsed_ranges: 98 type_match = type == target_type or type == "*" or target_type == "*" 99 subtype_match = ( 100 subtype == target_subtype or subtype == "*" or target_subtype == "*" 101 ) 102 if type_match and subtype_match: 103 param_matches = reduce( 104 lambda x, y: x + y, 105 [ 106 1 107 for (key, value) in target_params.items() 108 if key != "q" and key in params and value == params[key] 109 ], 110 0, 111 ) 112 fitness = (type == target_type) and 100 or 0 113 fitness += (subtype == target_subtype) and 10 or 0 114 fitness += param_matches 115 if fitness > best_fitness: 116 best_fitness = fitness 117 best_fit_q = params["q"] 118 119 return best_fitness, float(best_fit_q) 120 121 122def quality_parsed(mime_type, parsed_ranges): 123 """Find the best match for a mime-type amongst parsed media-ranges. 124 125 Find the best match for a given mime-type against a list of media_ranges 126 that have already been parsed by parse_media_range(). Returns the 'q' 127 quality parameter of the best match, 0 if no match was found. This function 128 bahaves the same as quality() except that 'parsed_ranges' must be a list of 129 parsed media ranges. 130 """ 131 132 return fitness_and_quality_parsed(mime_type, parsed_ranges)[1] 133 134 135def quality(mime_type, ranges): 136 """Return the quality ('q') of a mime-type against a list of media-ranges. 137 138 Returns the quality 'q' of a mime-type when compared against the 139 media-ranges in ranges. For example: 140 141 >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, 142 text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') 143 0.7 144 145 """ 146 parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] 147 148 return quality_parsed(mime_type, parsed_ranges) 149 150 151def best_match(supported, header): 152 """Return mime-type with the highest quality ('q') from list of candidates. 153 154 Takes a list of supported mime-types and finds the best match for all the 155 media-ranges listed in header. The value of header must be a string that 156 conforms to the format of the HTTP Accept: header. The value of 'supported' 157 is a list of mime-types. The list of supported mime-types should be sorted 158 in order of increasing desirability, in case of a situation where there is 159 a tie. 160 161 >>> best_match(['application/xbel+xml', 'text/xml'], 162 'text/*;q=0.5,*/*; q=0.1') 163 'text/xml' 164 """ 165 split_header = _filter_blank(header.split(",")) 166 parsed_header = [parse_media_range(r) for r in split_header] 167 weighted_matches = [] 168 pos = 0 169 for mime_type in supported: 170 weighted_matches.append( 171 (fitness_and_quality_parsed(mime_type, parsed_header), pos, mime_type) 172 ) 173 pos += 1 174 weighted_matches.sort() 175 176 return weighted_matches[-1][0][1] and weighted_matches[-1][2] or "" 177 178 179def _filter_blank(i): 180 for s in i: 181 if s.strip(): 182 yield s 183