1#!/usr/bin/python 2# 3# Copyright (c) 2016, Alliance for Open Media. All rights reserved. 4# 5# This source code is subject to the terms of the BSD 2 Clause License and 6# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 7# was not distributed with this source code in the LICENSE file, you can 8# obtain it at www.aomedia.org/license/software. If the Alliance for Open 9# Media Patent License 1.0 was not distributed with this source code in the 10# PATENTS file, you can obtain it at www.aomedia.org/license/patent. 11# 12 13"""Converts video encoding result data from text files to visualization 14data source.""" 15 16__author__ = "[email protected] (James Zern)," 17__author__ += "[email protected] (Jim Bankoski)" 18 19import fnmatch 20import numpy as np 21import scipy as sp 22import scipy.interpolate 23import os 24import re 25import string 26import sys 27import math 28import warnings 29 30import gviz_api 31 32from os.path import basename 33from os.path import splitext 34 35warnings.simplefilter('ignore', np.RankWarning) 36warnings.simplefilter('ignore', RuntimeWarning) 37 38def bdsnr2(metric_set1, metric_set2): 39 """ 40 BJONTEGAARD Bjontegaard metric calculation adapted 41 Bjontegaard's snr metric allows to compute the average % saving in decibels 42 between two rate-distortion curves [1]. This is an adaptation of that 43 method that fixes inconsistencies when the curve fit operation goes awry 44 by replacing the curve fit function with a Piecewise Cubic Hermite 45 Interpolating Polynomial and then integrating that by evaluating that 46 function at small intervals using the trapezoid method to calculate 47 the integral. 48 49 metric_set1 - list of tuples ( bitrate, metric ) for first graph 50 metric_set2 - list of tuples ( bitrate, metric ) for second graph 51 """ 52 53 if not metric_set1 or not metric_set2: 54 return 0.0 55 56 try: 57 58 # pchip_interlopate requires keys sorted by x axis. x-axis will 59 # be our metric not the bitrate so sort by metric. 60 metric_set1.sort() 61 metric_set2.sort() 62 63 # Pull the log of the rate and clamped psnr from metric_sets. 64 log_rate1 = [math.log(x[0]) for x in metric_set1] 65 metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] 66 log_rate2 = [math.log(x[0]) for x in metric_set2] 67 metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] 68 69 # Integration interval. This metric only works on the area that's 70 # overlapping. Extrapolation of these things is sketchy so we avoid. 71 min_int = max([min(log_rate1), min(log_rate2)]) 72 max_int = min([max(log_rate1), max(log_rate2)]) 73 74 # No overlap means no sensible metric possible. 75 if max_int <= min_int: 76 return 0.0 77 78 # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to 79 # create 100 new samples points separated by interval. 80 lin = np.linspace(min_int, max_int, num=100, retstep=True) 81 interval = lin[1] 82 samples = lin[0] 83 v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples) 84 v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples) 85 86 # Calculate the integral using the trapezoid method on the samples. 87 int_v1 = np.trapz(v1, dx=interval) 88 int_v2 = np.trapz(v2, dx=interval) 89 90 # Calculate the average improvement. 91 avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) 92 93 except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: 94 return 0 95 96 return avg_exp_diff 97 98def bdrate2(metric_set1, metric_set2): 99 """ 100 BJONTEGAARD Bjontegaard metric calculation adapted 101 Bjontegaard's metric allows to compute the average % saving in bitrate 102 between two rate-distortion curves [1]. This is an adaptation of that 103 method that fixes inconsistencies when the curve fit operation goes awry 104 by replacing the curve fit function with a Piecewise Cubic Hermite 105 Interpolating Polynomial and then integrating that by evaluating that 106 function at small intervals using the trapezoid method to calculate 107 the integral. 108 109 metric_set1 - list of tuples ( bitrate, metric ) for first graph 110 metric_set2 - list of tuples ( bitrate, metric ) for second graph 111 """ 112 113 if not metric_set1 or not metric_set2: 114 return 0.0 115 116 try: 117 118 # pchip_interlopate requires keys sorted by x axis. x-axis will 119 # be our metric not the bitrate so sort by metric. 120 metric_set1.sort(key=lambda tup: tup[1]) 121 metric_set2.sort(key=lambda tup: tup[1]) 122 123 # Pull the log of the rate and clamped psnr from metric_sets. 124 log_rate1 = [math.log(x[0]) for x in metric_set1] 125 metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1] 126 log_rate2 = [math.log(x[0]) for x in metric_set2] 127 metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2] 128 129 # Integration interval. This metric only works on the area that's 130 # overlapping. Extrapolation of these things is sketchy so we avoid. 131 min_int = max([min(metric1), min(metric2)]) 132 max_int = min([max(metric1), max(metric2)]) 133 134 # No overlap means no sensible metric possible. 135 if max_int <= min_int: 136 return 0.0 137 138 # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to 139 # create 100 new samples points separated by interval. 140 lin = np.linspace(min_int, max_int, num=100, retstep=True) 141 interval = lin[1] 142 samples = lin[0] 143 v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples) 144 v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples) 145 146 # Calculate the integral using the trapezoid method on the samples. 147 int_v1 = np.trapz(v1, dx=interval) 148 int_v2 = np.trapz(v2, dx=interval) 149 150 # Calculate the average improvement. 151 avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int) 152 153 except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e: 154 return 0 155 156 # Convert to a percentage. 157 avg_diff = (math.exp(avg_exp_diff) - 1) * 100 158 159 return avg_diff 160 161 162 163def FillForm(string_for_substitution, dictionary_of_vars): 164 """ 165 This function substitutes all matches of the command string //%% ... %%// 166 with the variable represented by ... . 167 """ 168 return_string = string_for_substitution 169 for i in re.findall("//%%(.*)%%//", string_for_substitution): 170 return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i], 171 return_string) 172 return return_string 173 174 175def HasMetrics(line): 176 """ 177 The metrics files produced by aomenc are started with a B for headers. 178 """ 179 # If the first char of the first word on the line is a digit 180 if len(line) == 0: 181 return False 182 if len(line.split()) == 0: 183 return False 184 if line.split()[0][0:1].isdigit(): 185 return True 186 return False 187 188def GetMetrics(file_name): 189 metric_file = open(file_name, "r") 190 return metric_file.readline().split(); 191 192def ParseMetricFile(file_name, metric_column): 193 metric_set1 = set([]) 194 metric_file = open(file_name, "r") 195 for line in metric_file: 196 metrics = string.split(line) 197 if HasMetrics(line): 198 if metric_column < len(metrics): 199 try: 200 tuple = float(metrics[0]), float(metrics[metric_column]) 201 except: 202 tuple = float(metrics[0]), 0 203 else: 204 tuple = float(metrics[0]), 0 205 metric_set1.add(tuple) 206 metric_set1_sorted = sorted(metric_set1) 207 return metric_set1_sorted 208 209 210def FileBetter(file_name_1, file_name_2, metric_column, method): 211 """ 212 Compares two data files and determines which is better and by how 213 much. Also produces a histogram of how much better, by PSNR. 214 metric_column is the metric. 215 """ 216 # Store and parse our two files into lists of unique tuples. 217 218 # Read the two files, parsing out lines starting with bitrate. 219 metric_set1_sorted = ParseMetricFile(file_name_1, metric_column) 220 metric_set2_sorted = ParseMetricFile(file_name_2, metric_column) 221 222 223 def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2): 224 """ 225 Search through the sorted metric file for metrics on either side of 226 the metric from file 1. Since both lists are sorted we really 227 should not have to search through the entire range, but these 228 are small files.""" 229 total_bitrate_difference_ratio = 0.0 230 count = 0 231 for bitrate, metric in metric_set1_sorted: 232 if bitrate == 0: 233 continue 234 for i in range(len(metric_set2_sorted) - 1): 235 s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i] 236 s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1] 237 # We have a point on either side of our metric range. 238 if metric > s2_metric_0 and metric <= s2_metric_1: 239 240 # Calculate a slope. 241 if s2_metric_1 - s2_metric_0 != 0: 242 metric_slope = ((s2_bitrate_1 - s2_bitrate_0) / 243 (s2_metric_1 - s2_metric_0)) 244 else: 245 metric_slope = 0 246 247 estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) * 248 metric_slope) 249 250 if estimated_s2_bitrate == 0: 251 continue 252 # Calculate percentage difference as given by base. 253 if base_is_set_2 == 0: 254 bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / 255 bitrate) 256 else: 257 bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) / 258 estimated_s2_bitrate) 259 260 total_bitrate_difference_ratio += bitrate_difference_ratio 261 count += 1 262 break 263 264 # Calculate the average improvement between graphs. 265 if count != 0: 266 avg = total_bitrate_difference_ratio / count 267 268 else: 269 avg = 0.0 270 271 return avg 272 273 # Be fair to both graphs by testing all the points in each. 274 if method == 'avg': 275 avg_improvement = 50 * ( 276 GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) - 277 GraphBetter(metric_set2_sorted, metric_set1_sorted, 0)) 278 elif method == 'dsnr': 279 avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted) 280 else: 281 avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted) 282 283 return avg_improvement 284 285 286def HandleFiles(variables): 287 """ 288 This script creates html for displaying metric data produced from data 289 in a video stats file, as created by the AOM project when enable_psnr 290 is turned on: 291 292 Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..] 293 294 The script parses each metrics file [see below] that matches the 295 statfile_pattern in the baseline directory and looks for the file that 296 matches that same file in each of the sub_dirs, and compares the resultant 297 metrics bitrate, avg psnr, glb psnr, and ssim. " 298 299 It provides a table in which each row is a file in the line directory, 300 and a column for each subdir, with the cells representing how that clip 301 compares to baseline for that subdir. A graph is given for each which 302 compares filesize to that metric. If you click on a point in the graph it 303 zooms in on that point. 304 305 a SAMPLE metrics file: 306 307 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 308 25.911 38.242 38.104 38.258 38.121 75.790 14103 309 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 310 49.982 41.264 41.129 41.255 41.122 83.993 19817 311 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 312 74.967 42.911 42.767 42.899 42.756 87.928 17332 313 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 314 100.012 43.983 43.838 43.881 43.738 89.695 25389 315 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 316 149.980 45.338 45.203 45.184 45.043 91.591 25438 317 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 318 199.852 46.225 46.123 46.113 45.999 92.679 28302 319 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 320 249.922 46.864 46.773 46.777 46.673 93.334 27244 321 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 322 299.998 47.366 47.281 47.317 47.220 93.844 27137 323 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 324 349.769 47.746 47.677 47.722 47.648 94.178 32226 325 Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us) 326 399.773 48.032 47.971 48.013 47.946 94.362 36203 327 328 sample use: 329 visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html 330 """ 331 332 # The template file is the html file into which we will write the 333 # data from the stats file, formatted correctly for the gviz_api. 334 template_file = open(variables[1], "r") 335 page_template = template_file.read() 336 template_file.close() 337 338 # This is the path match pattern for finding stats files amongst 339 # all the other files it could be. eg: *.stt 340 file_pattern = variables[2] 341 342 # This is the directory with files that we will use to do the comparison 343 # against. 344 baseline_dir = variables[3] 345 snrs = '' 346 filestable = {} 347 348 filestable['dsnr'] = '' 349 filestable['drate'] = '' 350 filestable['avg'] = '' 351 352 # Dirs is directories after the baseline to compare to the base. 353 dirs = variables[4:len(variables)] 354 355 # Find the metric files in the baseline directory. 356 dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern)) 357 358 metrics = GetMetrics(baseline_dir + "/" + dir_list[0]) 359 360 metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];' 361 362 for column in range(1, len(metrics)): 363 364 for metric in ['avg','dsnr','drate']: 365 description = {"file": ("string", "File")} 366 367 # Go through each directory and add a column header to our description. 368 countoverall = {} 369 sumoverall = {} 370 371 for directory in dirs: 372 description[directory] = ("number", directory) 373 countoverall[directory] = 0 374 sumoverall[directory] = 0 375 376 # Data holds the data for the visualization, name given comes from 377 # gviz_api sample code. 378 data = [] 379 for filename in dir_list: 380 row = {'file': splitext(basename(filename))[0] } 381 baseline_file_name = baseline_dir + "/" + filename 382 383 # Read the metric file from each of the directories in our list. 384 for directory in dirs: 385 metric_file_name = directory + "/" + filename 386 387 # If there is a metric file in the current directory, open it 388 # and calculate its overall difference between it and the baseline 389 # directory's metric file. 390 if os.path.isfile(metric_file_name): 391 overall = FileBetter(baseline_file_name, metric_file_name, 392 column, metric) 393 row[directory] = overall 394 395 sumoverall[directory] += overall 396 countoverall[directory] += 1 397 398 data.append(row) 399 400 # Add the overall numbers. 401 row = {"file": "OVERALL" } 402 for directory in dirs: 403 row[directory] = sumoverall[directory] / countoverall[directory] 404 data.append(row) 405 406 # write the tables out 407 data_table = gviz_api.DataTable(description) 408 data_table.LoadData(data) 409 410 filestable[metric] = ( filestable[metric] + "filestable_" + metric + 411 "[" + str(column) + "]=" + 412 data_table.ToJSon(columns_order=["file"]+dirs) + "\n" ) 413 414 filestable_avg = filestable['avg'] 415 filestable_dpsnr = filestable['dsnr'] 416 filestable_drate = filestable['drate'] 417 418 # Now we collect all the data for all the graphs. First the column 419 # headers which will be Datarate and then each directory. 420 columns = ("datarate",baseline_dir) 421 description = {"datarate":("number", "Datarate")} 422 for directory in dirs: 423 description[directory] = ("number", directory) 424 425 description[baseline_dir] = ("number", baseline_dir) 426 427 snrs = snrs + "snrs[" + str(column) + "] = [" 428 429 # Now collect the data for the graphs, file by file. 430 for filename in dir_list: 431 432 data = [] 433 434 # Collect the file in each directory and store all of its metrics 435 # in the associated gviz metrics table. 436 all_dirs = dirs + [baseline_dir] 437 for directory in all_dirs: 438 439 metric_file_name = directory + "/" + filename 440 if not os.path.isfile(metric_file_name): 441 continue 442 443 # Read and parse the metrics file storing it to the data we'll 444 # use for the gviz_api.Datatable. 445 metrics = ParseMetricFile(metric_file_name, column) 446 for bitrate, metric in metrics: 447 data.append({"datarate": bitrate, directory: metric}) 448 449 data_table = gviz_api.DataTable(description) 450 data_table.LoadData(data) 451 snrs = snrs + "'" + data_table.ToJSon( 452 columns_order=tuple(["datarate",baseline_dir]+dirs)) + "'," 453 454 snrs = snrs + "]\n" 455 456 formatters = "" 457 for i in range(len(dirs)): 458 formatters = "%s formatter.format(better, %d);" % (formatters, i+1) 459 460 print FillForm(page_template, vars()) 461 return 462 463if len(sys.argv) < 3: 464 print HandleFiles.__doc__ 465else: 466 HandleFiles(sys.argv) 467