1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Buffer data upload performance tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es3pBufferDataUploadTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuVectorUtil.hpp"
28 #include "tcuSurface.hpp"
29 #include "tcuCPUWarmup.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "gluRenderContext.hpp"
32 #include "gluShaderProgram.hpp"
33 #include "gluStrUtil.hpp"
34 #include "gluPixelTransfer.hpp"
35 #include "gluObjectWrapper.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 #include "deClock.h"
39 #include "deMath.h"
40 #include "deStringUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deMemory.h"
43 #include "deThread.h"
44 #include "deMeta.hpp"
45
46 #include <algorithm>
47 #include <iomanip>
48 #include <limits>
49
50 namespace deqp
51 {
52 namespace gles3
53 {
54 namespace Performance
55 {
56 namespace
57 {
58
59 using de::meta::EnableIf;
60 using de::meta::Not;
61 using gls::LineParametersWithConfidence;
62 using gls::theilSenSiegelLinearRegression;
63
64 static const char *const s_minimalVertexShader = "#version 300 es\n"
65 "in highp vec4 a_position;\n"
66 "void main (void)\n"
67 "{\n"
68 " gl_Position = a_position;\n"
69 "}\n";
70
71 static const char *const s_minimalFragnentShader = "#version 300 es\n"
72 "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
73 "void main (void)\n"
74 "{\n"
75 " dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
76 "}\n";
77
78 static const char *const s_colorVertexShader = "#version 300 es\n"
79 "in highp vec4 a_position;\n"
80 "in highp vec4 a_color;\n"
81 "out highp vec4 v_color;\n"
82 "void main (void)\n"
83 "{\n"
84 " gl_Position = a_position;\n"
85 " v_color = a_color;\n"
86 "}\n";
87
88 static const char *const s_colorFragmentShader = "#version 300 es\n"
89 "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
90 "in mediump vec4 v_color;\n"
91 "void main (void)\n"
92 "{\n"
93 " dEQP_FragColor = v_color;\n"
94 "}\n";
95
96 struct SingleOperationDuration
97 {
98 uint64_t totalDuration;
99 uint64_t fitResponseDuration; // used for fitting
100 };
101
102 struct MapBufferRangeDuration
103 {
104 uint64_t mapDuration;
105 uint64_t unmapDuration;
106 uint64_t writeDuration;
107 uint64_t allocDuration;
108 uint64_t totalDuration;
109
110 uint64_t fitResponseDuration;
111 };
112
113 struct MapBufferRangeDurationNoAlloc
114 {
115 uint64_t mapDuration;
116 uint64_t unmapDuration;
117 uint64_t writeDuration;
118 uint64_t totalDuration;
119
120 uint64_t fitResponseDuration;
121 };
122
123 struct MapBufferRangeFlushDuration
124 {
125 uint64_t mapDuration;
126 uint64_t unmapDuration;
127 uint64_t writeDuration;
128 uint64_t flushDuration;
129 uint64_t allocDuration;
130 uint64_t totalDuration;
131
132 uint64_t fitResponseDuration;
133 };
134
135 struct MapBufferRangeFlushDurationNoAlloc
136 {
137 uint64_t mapDuration;
138 uint64_t unmapDuration;
139 uint64_t writeDuration;
140 uint64_t flushDuration;
141 uint64_t totalDuration;
142
143 uint64_t fitResponseDuration;
144 };
145
146 struct RenderReadDuration
147 {
148 uint64_t renderDuration;
149 uint64_t readDuration;
150 uint64_t renderReadDuration;
151 uint64_t totalDuration;
152
153 uint64_t fitResponseDuration;
154 };
155
156 struct UnrelatedUploadRenderReadDuration
157 {
158 uint64_t renderDuration;
159 uint64_t readDuration;
160 uint64_t renderReadDuration;
161 uint64_t totalDuration;
162
163 uint64_t fitResponseDuration;
164 };
165
166 struct UploadRenderReadDuration
167 {
168 uint64_t uploadDuration;
169 uint64_t renderDuration;
170 uint64_t readDuration;
171 uint64_t totalDuration;
172 uint64_t renderReadDuration;
173
174 uint64_t fitResponseDuration;
175 };
176
177 struct UploadRenderReadDurationWithUnrelatedUploadSize
178 {
179 uint64_t uploadDuration;
180 uint64_t renderDuration;
181 uint64_t readDuration;
182 uint64_t totalDuration;
183 uint64_t renderReadDuration;
184
185 uint64_t fitResponseDuration;
186 };
187
188 struct RenderUploadRenderReadDuration
189 {
190 uint64_t firstRenderDuration;
191 uint64_t uploadDuration;
192 uint64_t secondRenderDuration;
193 uint64_t readDuration;
194 uint64_t totalDuration;
195 uint64_t renderReadDuration;
196
197 uint64_t fitResponseDuration;
198 };
199
200 template <typename SampleT>
201 struct UploadSampleResult
202 {
203 typedef SampleT SampleType;
204
205 int bufferSize;
206 int allocatedSize;
207 int writtenSize;
208 SampleType duration;
209 };
210
211 template <typename SampleT>
212 struct RenderSampleResult
213 {
214 typedef SampleT SampleType;
215
216 int uploadedDataSize;
217 int renderDataSize;
218 int unrelatedDataSize;
219 int numVertices;
220 SampleT duration;
221 };
222
223 struct SingleOperationStatistics
224 {
225 float minTime;
226 float maxTime;
227 float medianTime;
228 float min2DecileTime; // !< minimum value in the 2nd decile
229 float max9DecileTime; // !< maximum value in the 9th decile
230 };
231
232 struct SingleCallStatistics
233 {
234 SingleOperationStatistics result;
235
236 float medianRate;
237 float maxDiffTime;
238 float maxDiff9DecileTime;
239 float medianDiffTime;
240
241 float maxRelDiffTime;
242 float max9DecileRelDiffTime;
243 float medianRelDiffTime;
244 };
245
246 struct MapCallStatistics
247 {
248 SingleOperationStatistics map;
249 SingleOperationStatistics unmap;
250 SingleOperationStatistics write;
251 SingleOperationStatistics alloc;
252 SingleOperationStatistics result;
253
254 float medianRate;
255 float maxDiffTime;
256 float maxDiff9DecileTime;
257 float medianDiffTime;
258
259 float maxRelDiffTime;
260 float max9DecileRelDiffTime;
261 float medianRelDiffTime;
262 };
263
264 struct MapFlushCallStatistics
265 {
266 SingleOperationStatistics map;
267 SingleOperationStatistics unmap;
268 SingleOperationStatistics write;
269 SingleOperationStatistics flush;
270 SingleOperationStatistics alloc;
271 SingleOperationStatistics result;
272
273 float medianRate;
274 float maxDiffTime;
275 float maxDiff9DecileTime;
276 float medianDiffTime;
277
278 float maxRelDiffTime;
279 float max9DecileRelDiffTime;
280 float medianRelDiffTime;
281 };
282
283 struct RenderReadStatistics
284 {
285 SingleOperationStatistics render;
286 SingleOperationStatistics read;
287 SingleOperationStatistics result;
288 SingleOperationStatistics total;
289
290 float medianRate;
291 float maxDiffTime;
292 float maxDiff9DecileTime;
293 float medianDiffTime;
294
295 float maxRelDiffTime;
296 float max9DecileRelDiffTime;
297 float medianRelDiffTime;
298 };
299
300 struct UploadRenderReadStatistics
301 {
302 SingleOperationStatistics upload;
303 SingleOperationStatistics render;
304 SingleOperationStatistics read;
305 SingleOperationStatistics result;
306 SingleOperationStatistics total;
307
308 float medianRate;
309 float maxDiffTime;
310 float maxDiff9DecileTime;
311 float medianDiffTime;
312
313 float maxRelDiffTime;
314 float max9DecileRelDiffTime;
315 float medianRelDiffTime;
316 };
317
318 struct RenderUploadRenderReadStatistics
319 {
320 SingleOperationStatistics firstRender;
321 SingleOperationStatistics upload;
322 SingleOperationStatistics secondRender;
323 SingleOperationStatistics read;
324 SingleOperationStatistics result;
325 SingleOperationStatistics total;
326
327 float medianRate;
328 float maxDiffTime;
329 float maxDiff9DecileTime;
330 float medianDiffTime;
331
332 float maxRelDiffTime;
333 float max9DecileRelDiffTime;
334 float medianRelDiffTime;
335 };
336
337 template <typename T>
338 struct SampleTypeTraits
339 {
340 };
341
342 template <>
343 struct SampleTypeTraits<SingleOperationDuration>
344 {
345 typedef SingleCallStatistics StatsType;
346
347 enum
348 {
349 HAS_MAP_STATS = 0
350 };
351 enum
352 {
353 HAS_UNMAP_STATS = 0
354 };
355 enum
356 {
357 HAS_WRITE_STATS = 0
358 };
359 enum
360 {
361 HAS_FLUSH_STATS = 0
362 };
363 enum
364 {
365 HAS_ALLOC_STATS = 0
366 };
367 enum
368 {
369 LOG_CONTRIBUTIONS = 0
370 };
371 };
372
373 template <>
374 struct SampleTypeTraits<MapBufferRangeDuration>
375 {
376 typedef MapCallStatistics StatsType;
377
378 enum
379 {
380 HAS_MAP_STATS = 1
381 };
382 enum
383 {
384 HAS_UNMAP_STATS = 1
385 };
386 enum
387 {
388 HAS_WRITE_STATS = 1
389 };
390 enum
391 {
392 HAS_FLUSH_STATS = 0
393 };
394 enum
395 {
396 HAS_ALLOC_STATS = 1
397 };
398 enum
399 {
400 LOG_CONTRIBUTIONS = 1
401 };
402 };
403
404 template <>
405 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
406 {
407 typedef MapCallStatistics StatsType;
408
409 enum
410 {
411 HAS_MAP_STATS = 1
412 };
413 enum
414 {
415 HAS_UNMAP_STATS = 1
416 };
417 enum
418 {
419 HAS_WRITE_STATS = 1
420 };
421 enum
422 {
423 HAS_FLUSH_STATS = 0
424 };
425 enum
426 {
427 HAS_ALLOC_STATS = 0
428 };
429 enum
430 {
431 LOG_CONTRIBUTIONS = 1
432 };
433 };
434
435 template <>
436 struct SampleTypeTraits<MapBufferRangeFlushDuration>
437 {
438 typedef MapFlushCallStatistics StatsType;
439
440 enum
441 {
442 HAS_MAP_STATS = 1
443 };
444 enum
445 {
446 HAS_UNMAP_STATS = 1
447 };
448 enum
449 {
450 HAS_WRITE_STATS = 1
451 };
452 enum
453 {
454 HAS_FLUSH_STATS = 1
455 };
456 enum
457 {
458 HAS_ALLOC_STATS = 1
459 };
460 enum
461 {
462 LOG_CONTRIBUTIONS = 1
463 };
464 };
465
466 template <>
467 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
468 {
469 typedef MapFlushCallStatistics StatsType;
470
471 enum
472 {
473 HAS_MAP_STATS = 1
474 };
475 enum
476 {
477 HAS_UNMAP_STATS = 1
478 };
479 enum
480 {
481 HAS_WRITE_STATS = 1
482 };
483 enum
484 {
485 HAS_FLUSH_STATS = 1
486 };
487 enum
488 {
489 HAS_ALLOC_STATS = 0
490 };
491 enum
492 {
493 LOG_CONTRIBUTIONS = 1
494 };
495 };
496
497 template <>
498 struct SampleTypeTraits<RenderReadDuration>
499 {
500 typedef RenderReadStatistics StatsType;
501
502 enum
503 {
504 HAS_RENDER_STATS = 1
505 };
506 enum
507 {
508 HAS_READ_STATS = 1
509 };
510 enum
511 {
512 HAS_UPLOAD_STATS = 0
513 };
514 enum
515 {
516 HAS_TOTAL_STATS = 1
517 };
518 enum
519 {
520 HAS_FIRST_RENDER_STATS = 0
521 };
522 enum
523 {
524 HAS_SECOND_RENDER_STATS = 0
525 };
526
527 enum
528 {
529 LOG_CONTRIBUTIONS = 1
530 };
531 };
532
533 template <>
534 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
535 {
536 typedef RenderReadStatistics StatsType;
537
538 enum
539 {
540 HAS_RENDER_STATS = 1
541 };
542 enum
543 {
544 HAS_READ_STATS = 1
545 };
546 enum
547 {
548 HAS_UPLOAD_STATS = 0
549 };
550 enum
551 {
552 HAS_TOTAL_STATS = 1
553 };
554 enum
555 {
556 HAS_FIRST_RENDER_STATS = 0
557 };
558 enum
559 {
560 HAS_SECOND_RENDER_STATS = 0
561 };
562
563 enum
564 {
565 LOG_CONTRIBUTIONS = 1
566 };
567 };
568
569 template <>
570 struct SampleTypeTraits<UploadRenderReadDuration>
571 {
572 typedef UploadRenderReadStatistics StatsType;
573
574 enum
575 {
576 HAS_RENDER_STATS = 1
577 };
578 enum
579 {
580 HAS_READ_STATS = 1
581 };
582 enum
583 {
584 HAS_UPLOAD_STATS = 1
585 };
586 enum
587 {
588 HAS_TOTAL_STATS = 1
589 };
590 enum
591 {
592 HAS_FIRST_RENDER_STATS = 0
593 };
594 enum
595 {
596 HAS_SECOND_RENDER_STATS = 0
597 };
598
599 enum
600 {
601 LOG_CONTRIBUTIONS = 1
602 };
603 enum
604 {
605 LOG_UNRELATED_UPLOAD_SIZE = 0
606 };
607 };
608
609 template <>
610 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
611 {
612 typedef UploadRenderReadStatistics StatsType;
613
614 enum
615 {
616 HAS_RENDER_STATS = 1
617 };
618 enum
619 {
620 HAS_READ_STATS = 1
621 };
622 enum
623 {
624 HAS_UPLOAD_STATS = 1
625 };
626 enum
627 {
628 HAS_TOTAL_STATS = 1
629 };
630 enum
631 {
632 HAS_FIRST_RENDER_STATS = 0
633 };
634 enum
635 {
636 HAS_SECOND_RENDER_STATS = 0
637 };
638
639 enum
640 {
641 LOG_CONTRIBUTIONS = 1
642 };
643 enum
644 {
645 LOG_UNRELATED_UPLOAD_SIZE = 1
646 };
647 };
648
649 template <>
650 struct SampleTypeTraits<RenderUploadRenderReadDuration>
651 {
652 typedef RenderUploadRenderReadStatistics StatsType;
653
654 enum
655 {
656 HAS_RENDER_STATS = 0
657 };
658 enum
659 {
660 HAS_READ_STATS = 1
661 };
662 enum
663 {
664 HAS_UPLOAD_STATS = 1
665 };
666 enum
667 {
668 HAS_TOTAL_STATS = 1
669 };
670 enum
671 {
672 HAS_FIRST_RENDER_STATS = 1
673 };
674 enum
675 {
676 HAS_SECOND_RENDER_STATS = 1
677 };
678
679 enum
680 {
681 LOG_CONTRIBUTIONS = 1
682 };
683 enum
684 {
685 LOG_UNRELATED_UPLOAD_SIZE = 1
686 };
687 };
688
689 struct UploadSampleAnalyzeResult
690 {
691 float transferRateMedian;
692 float transferRateAtRange;
693 float transferRateAtInfinity;
694 };
695
696 struct RenderSampleAnalyzeResult
697 {
698 float renderRateMedian;
699 float renderRateAtRange;
700 float renderRateAtInfinity;
701 };
702
703 class UnmapFailureError : public std::exception
704 {
705 public:
UnmapFailureError(void)706 UnmapFailureError(void) : std::exception()
707 {
708 }
709 };
710
getHumanReadableByteSize(int numBytes)711 static std::string getHumanReadableByteSize(int numBytes)
712 {
713 std::ostringstream buf;
714
715 if (numBytes < 1024)
716 buf << numBytes << " byte(s)";
717 else if (numBytes < 1024 * 1024)
718 buf << de::floatToString((float)numBytes / 1024.0f, 1) << " KiB";
719 else
720 buf << de::floatToString((float)numBytes / 1024.0f / 1024.0f, 1) << " MiB";
721
722 return buf.str();
723 }
724
medianTimeMemcpy(void * dst,const void * src,int numBytes)725 static uint64_t medianTimeMemcpy(void *dst, const void *src, int numBytes)
726 {
727 // Time used by memcpy is assumed to be asymptotically linear
728
729 // With large numBytes, the probability of context switch or other random
730 // event is high. Apply memcpy in parts and report how much time would
731 // memcpy have used with the median transfer rate.
732
733 // Less than 1MiB, no need to do anything special
734 if (numBytes < 1048576)
735 {
736 uint64_t startTime;
737 uint64_t endTime;
738
739 deYield();
740
741 startTime = deGetMicroseconds();
742 deMemcpy(dst, src, numBytes);
743 endTime = deGetMicroseconds();
744
745 return endTime - startTime;
746 }
747 else
748 {
749 // Do memcpy in multiple parts
750
751 const int numSections = 5;
752 const int sectionAlign = 16;
753
754 int sectionStarts[numSections + 1];
755 int sectionLens[numSections];
756 uint64_t sectionTimes[numSections];
757 uint64_t medianTime;
758 uint64_t bestTime = 0;
759
760 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
761 sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
762 sectionStarts[numSections] = numBytes;
763
764 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
765 sectionLens[sectionNdx] = sectionStarts[sectionNdx + 1] - sectionStarts[sectionNdx];
766
767 // Memcpy is usually called after mapbuffer range which may take
768 // a lot of time. To prevent power management from kicking in during
769 // copy, warm up more.
770 {
771 deYield();
772 tcu::warmupCPU();
773 deYield();
774 }
775
776 for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
777 {
778 uint64_t startTime;
779 uint64_t endTime;
780
781 startTime = deGetMicroseconds();
782 deMemcpy((uint8_t *)dst + sectionStarts[sectionNdx], (const uint8_t *)src + sectionStarts[sectionNdx],
783 sectionLens[sectionNdx]);
784 endTime = deGetMicroseconds();
785
786 sectionTimes[sectionNdx] = endTime - startTime;
787
788 if (!bestTime || sectionTimes[sectionNdx] < bestTime)
789 bestTime = sectionTimes[sectionNdx];
790
791 // Detect if write takes 50% longer than it should, and warm up if that happened
792 if (sectionNdx != numSections - 1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime)
793 {
794 deYield();
795 tcu::warmupCPU();
796 deYield();
797 }
798 }
799
800 std::sort(sectionTimes, sectionTimes + numSections);
801
802 if ((numSections % 2) == 0)
803 medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
804 else
805 medianTime = sectionTimes[numSections / 2];
806
807 return medianTime * numSections;
808 }
809 }
810
busyworkCalculation(float initial,int workSize)811 static float busyworkCalculation(float initial, int workSize)
812 {
813 float a = initial;
814 int b = 123;
815
816 for (int ndx = 0; ndx < workSize; ++ndx)
817 {
818 a = deFloatCos(a + (float)b);
819 b = (b + 63) % 107 + de::abs((int)(a * 10.0f));
820 }
821
822 return a + (float)b;
823 }
824
busyWait(int microseconds)825 static void busyWait(int microseconds)
826 {
827 const uint64_t maxSingleWaitTime = 1000; // 1ms
828 const uint64_t endTime = deGetMicroseconds() + microseconds;
829 float unused = *tcu::warmupCPUInternal::g_unused.m_v;
830 int workSize = 500;
831
832 // exponentially increase work, cap to 1ms
833 while (deGetMicroseconds() < endTime)
834 {
835 const uint64_t startTime = deGetMicroseconds();
836 uint64_t totalTime;
837
838 unused = busyworkCalculation(unused, workSize);
839
840 totalTime = deGetMicroseconds() - startTime;
841
842 if (totalTime >= maxSingleWaitTime)
843 break;
844 else
845 workSize *= 2;
846 }
847
848 // "wait"
849 while (deGetMicroseconds() < endTime)
850 unused = busyworkCalculation(unused, workSize);
851
852 *tcu::warmupCPUInternal::g_unused.m_v = unused;
853 }
854
855 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
856 template <typename T>
linearSample(const std::vector<T> & values,float position)857 static float linearSample(const std::vector<T> &values, float position)
858 {
859 DE_ASSERT(position >= 0.0f);
860 DE_ASSERT(position <= 1.0f);
861
862 const float floatNdx = (float)(values.size() - 1) * position;
863 const int lowerNdx = (int)deFloatFloor(floatNdx);
864 const int higherNdx = lowerNdx + 1;
865 const float interpolationFactor = floatNdx - (float)lowerNdx;
866
867 DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
868 DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
869 DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
870
871 return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
872 }
873
874 template <typename T>
calculateSingleOperationStatistics(const std::vector<T> & samples,uint64_t T::SampleType::* target)875 SingleOperationStatistics calculateSingleOperationStatistics(const std::vector<T> &samples,
876 uint64_t T::SampleType::*target)
877 {
878 SingleOperationStatistics stats;
879 std::vector<uint64_t> values(samples.size());
880
881 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
882 values[ndx] = samples[ndx].duration.*target;
883
884 std::sort(values.begin(), values.end());
885
886 stats.minTime = (float)values.front();
887 stats.maxTime = (float)values.back();
888 stats.medianTime = linearSample(values, 0.5f);
889 stats.min2DecileTime = linearSample(values, 0.1f);
890 stats.max9DecileTime = linearSample(values, 0.9f);
891
892 return stats;
893 }
894
895 template <typename StatisticsType, typename SampleType>
calculateBasicStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples,int SampleType::* predictor)896 void calculateBasicStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
897 const std::vector<SampleType> &samples, int SampleType::*predictor)
898 {
899 std::vector<uint64_t> values(samples.size());
900
901 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
902 values[ndx] = samples[ndx].duration.fitResponseDuration;
903
904 // median rate
905 {
906 std::vector<float> processingRates(samples.size());
907
908 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
909 {
910 const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f;
911 processingRates[ndx] = (float)(samples[ndx].*predictor) / timeInSeconds;
912 }
913
914 std::sort(processingRates.begin(), processingRates.end());
915
916 stats.medianRate = linearSample(processingRates, 0.5f);
917 }
918
919 // results compared to the approximation
920 {
921 std::vector<float> timeDiffs(samples.size());
922
923 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
924 {
925 const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
926 const float actual = (float)values[ndx];
927 timeDiffs[ndx] = actual - prediction;
928 }
929 std::sort(timeDiffs.begin(), timeDiffs.end());
930
931 stats.maxDiffTime = timeDiffs.back();
932 stats.maxDiff9DecileTime = linearSample(timeDiffs, 0.9f);
933 stats.medianDiffTime = linearSample(timeDiffs, 0.5f);
934 }
935
936 // relative comparison to the approximation
937 {
938 std::vector<float> relativeDiffs(samples.size());
939
940 for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
941 {
942 const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
943 const float actual = (float)values[ndx];
944
945 // Ignore cases where we predict negative times, or if
946 // ratio would be (nearly) infinite: ignore if predicted
947 // time is less than 1 microsecond
948 if (prediction < 1.0f)
949 relativeDiffs[ndx] = 0.0f;
950 else
951 relativeDiffs[ndx] = (actual - prediction) / prediction;
952 }
953 std::sort(relativeDiffs.begin(), relativeDiffs.end());
954
955 stats.maxRelDiffTime = relativeDiffs.back();
956 stats.max9DecileRelDiffTime = linearSample(relativeDiffs, 0.9f);
957 stats.medianRelDiffTime = linearSample(relativeDiffs, 0.5f);
958 }
959
960 // values calculated using sorted timings
961
962 std::sort(values.begin(), values.end());
963
964 stats.result.minTime = (float)values.front();
965 stats.result.maxTime = (float)values.back();
966 stats.result.medianTime = linearSample(values, 0.5f);
967 stats.result.min2DecileTime = linearSample(values, 0.1f);
968 stats.result.max9DecileTime = linearSample(values, 0.9f);
969 }
970
971 template <typename StatisticsType, typename SampleType>
calculateBasicTransferStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)972 void calculateBasicTransferStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
973 const std::vector<SampleType> &samples)
974 {
975 calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
976 }
977
978 template <typename StatisticsType, typename SampleType>
calculateBasicRenderStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)979 void calculateBasicRenderStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
980 const std::vector<SampleType> &samples)
981 {
982 calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
983 }
984
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)985 static SingleCallStatistics calculateSampleStatistics(
986 const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
987 {
988 SingleCallStatistics stats;
989
990 calculateBasicTransferStatistics(stats, fit, samples);
991
992 return stats;
993 }
994
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)995 static MapCallStatistics calculateSampleStatistics(
996 const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
997 {
998 MapCallStatistics stats;
999
1000 calculateBasicTransferStatistics(stats, fit, samples);
1001
1002 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
1003 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
1004 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
1005 stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
1006
1007 return stats;
1008 }
1009
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1010 static MapFlushCallStatistics calculateSampleStatistics(
1011 const LineParametersWithConfidence &fit,
1012 const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
1013 {
1014 MapFlushCallStatistics stats;
1015
1016 calculateBasicTransferStatistics(stats, fit, samples);
1017
1018 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
1019 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
1020 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
1021 stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
1022 stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
1023
1024 return stats;
1025 }
1026
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1027 static MapCallStatistics calculateSampleStatistics(
1028 const LineParametersWithConfidence &fit,
1029 const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
1030 {
1031 MapCallStatistics stats;
1032
1033 calculateBasicTransferStatistics(stats, fit, samples);
1034
1035 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
1036 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
1037 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
1038
1039 return stats;
1040 }
1041
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1042 static MapFlushCallStatistics calculateSampleStatistics(
1043 const LineParametersWithConfidence &fit,
1044 const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
1045 {
1046 MapFlushCallStatistics stats;
1047
1048 calculateBasicTransferStatistics(stats, fit, samples);
1049
1050 stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
1051 stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
1052 stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
1053 stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
1054
1055 return stats;
1056 }
1057
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1058 static RenderReadStatistics calculateSampleStatistics(
1059 const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
1060 {
1061 RenderReadStatistics stats;
1062
1063 calculateBasicRenderStatistics(stats, fit, samples);
1064
1065 stats.render = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
1066 stats.read = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
1067 stats.total = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
1068
1069 return stats;
1070 }
1071
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1072 static RenderReadStatistics calculateSampleStatistics(
1073 const LineParametersWithConfidence &fit,
1074 const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
1075 {
1076 RenderReadStatistics stats;
1077
1078 calculateBasicRenderStatistics(stats, fit, samples);
1079
1080 stats.render = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
1081 stats.read = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
1082 stats.total = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
1083
1084 return stats;
1085 }
1086
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1087 static UploadRenderReadStatistics calculateSampleStatistics(
1088 const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
1089 {
1090 UploadRenderReadStatistics stats;
1091
1092 calculateBasicRenderStatistics(stats, fit, samples);
1093
1094 stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
1095 stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
1096 stats.read = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
1097 stats.total = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
1098
1099 return stats;
1100 }
1101
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1102 static UploadRenderReadStatistics calculateSampleStatistics(
1103 const LineParametersWithConfidence &fit,
1104 const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
1105 {
1106 UploadRenderReadStatistics stats;
1107
1108 calculateBasicRenderStatistics(stats, fit, samples);
1109
1110 stats.upload =
1111 calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
1112 stats.render =
1113 calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
1114 stats.read =
1115 calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
1116 stats.total =
1117 calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
1118
1119 return stats;
1120 }
1121
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1122 static RenderUploadRenderReadStatistics calculateSampleStatistics(
1123 const LineParametersWithConfidence &fit,
1124 const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
1125 {
1126 RenderUploadRenderReadStatistics stats;
1127
1128 calculateBasicRenderStatistics(stats, fit, samples);
1129
1130 stats.firstRender =
1131 calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
1132 stats.upload = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
1133 stats.secondRender =
1134 calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
1135 stats.read = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
1136 stats.total = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
1137
1138 return stats;
1139 }
1140
1141 template <typename DurationType>
fitLineToSamples(const std::vector<UploadSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,uint64_t DurationType::* target=& DurationType::fitResponseDuration)1142 static LineParametersWithConfidence fitLineToSamples(
1143 const std::vector<UploadSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
1144 uint64_t DurationType::*target = &DurationType::fitResponseDuration)
1145 {
1146 std::vector<tcu::Vec2> samplePoints;
1147
1148 for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
1149 {
1150 tcu::Vec2 point;
1151
1152 point.x() = (float)(samples[sampleNdx].writtenSize);
1153 point.y() = (float)(samples[sampleNdx].duration.*target);
1154
1155 samplePoints.push_back(point);
1156 }
1157
1158 return theilSenSiegelLinearRegression(samplePoints, 0.6f);
1159 }
1160
1161 template <typename DurationType>
fitLineToSamples(const std::vector<RenderSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,uint64_t DurationType::* target=& DurationType::fitResponseDuration)1162 static LineParametersWithConfidence fitLineToSamples(
1163 const std::vector<RenderSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
1164 uint64_t DurationType::*target = &DurationType::fitResponseDuration)
1165 {
1166 std::vector<tcu::Vec2> samplePoints;
1167
1168 for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
1169 {
1170 tcu::Vec2 point;
1171
1172 point.x() = (float)(samples[sampleNdx].renderDataSize);
1173 point.y() = (float)(samples[sampleNdx].duration.*target);
1174
1175 samplePoints.push_back(point);
1176 }
1177
1178 return theilSenSiegelLinearRegression(samplePoints, 0.6f);
1179 }
1180
1181 template <typename T>
fitLineToSamples(const std::vector<T> & samples,int beginNdx,int endNdx,uint64_t T::SampleType::* target=& T::SampleType::fitResponseDuration)1182 static LineParametersWithConfidence fitLineToSamples(
1183 const std::vector<T> &samples, int beginNdx, int endNdx,
1184 uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
1185 {
1186 return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
1187 }
1188
1189 template <typename T>
fitLineToSamples(const std::vector<T> & samples,uint64_t T::SampleType::* target=& T::SampleType::fitResponseDuration)1190 static LineParametersWithConfidence fitLineToSamples(
1191 const std::vector<T> &samples, uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
1192 {
1193 return fitLineToSamples(samples, 0, (int)samples.size(), target);
1194 }
1195
getAreaBetweenLines(float xmin,float xmax,float lineAOffset,float lineACoefficient,float lineBOffset,float lineBCoefficient)1196 static float getAreaBetweenLines(float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset,
1197 float lineBCoefficient)
1198 {
1199 const float lineAMin = lineAOffset + lineACoefficient * xmin;
1200 const float lineAMax = lineAOffset + lineACoefficient * xmax;
1201 const float lineBMin = lineBOffset + lineBCoefficient * xmin;
1202 const float lineBMax = lineBOffset + lineBCoefficient * xmax;
1203 const bool aOverBAtBegin = (lineAMin > lineBMin);
1204 const bool aOverBAtEnd = (lineAMax > lineBMax);
1205
1206 if (aOverBAtBegin == aOverBAtEnd)
1207 {
1208 // lines do not intersect
1209
1210 const float midpoint = (xmin + xmax) / 2.0f;
1211 const float width = (xmax - xmin);
1212
1213 const float lineAHeight = lineAOffset + lineACoefficient * midpoint;
1214 const float lineBHeight = lineBOffset + lineBCoefficient * midpoint;
1215
1216 return width * de::abs(lineAHeight - lineBHeight);
1217 }
1218 else
1219 {
1220
1221 // lines intersect
1222
1223 const float approachCoeffient = de::abs(lineACoefficient - lineBCoefficient);
1224 const float epsilon = 0.0001f;
1225 const float leftHeight = de::abs(lineAMin - lineBMin);
1226 const float rightHeight = de::abs(lineAMax - lineBMax);
1227
1228 if (approachCoeffient < epsilon)
1229 return 0.0f;
1230
1231 return (0.5f * leftHeight * (leftHeight / approachCoeffient)) +
1232 (0.5f * rightHeight * (rightHeight / approachCoeffient));
1233 }
1234 }
1235
1236 template <typename T>
calculateSampleFitLinearity(const std::vector<T> & samples,int T::* predictor)1237 static float calculateSampleFitLinearity(const std::vector<T> &samples, int T::*predictor)
1238 {
1239 // Compare the fitted line of first half of the samples to the fitted line of
1240 // the second half of the samples. Calculate a AABB that fully contains every
1241 // sample's x component and both fit lines in this range. Calculate the ratio
1242 // of the area between the lines and the AABB.
1243
1244 const float epsilon = 1.e-6f;
1245 const int midPoint = (int)samples.size() / 2;
1246 const LineParametersWithConfidence startApproximation =
1247 fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1248 const LineParametersWithConfidence endApproximation =
1249 fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1250
1251 const float aabbMinX = (float)(samples.front().*predictor);
1252 const float aabbMinY = de::min(startApproximation.offset + startApproximation.coefficient * aabbMinX,
1253 endApproximation.offset + endApproximation.coefficient * aabbMinX);
1254 const float aabbMaxX = (float)(samples.back().*predictor);
1255 const float aabbMaxY = de::max(startApproximation.offset + startApproximation.coefficient * aabbMaxX,
1256 endApproximation.offset + endApproximation.coefficient * aabbMaxX);
1257
1258 const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1259 const float areaBetweenLines =
1260 getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient,
1261 endApproximation.offset, endApproximation.coefficient);
1262 const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1263
1264 return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1265 }
1266
1267 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> & samples)1268 static float calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> &samples)
1269 {
1270 return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1271 }
1272
1273 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> & samples)1274 static float calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> &samples)
1275 {
1276 return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1277 }
1278
1279 template <typename T>
calculateSampleTemporalStability(const std::vector<T> & samples,int T::* predictor)1280 static float calculateSampleTemporalStability(const std::vector<T> &samples, int T::*predictor)
1281 {
1282 // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1283 // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1284 // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1285 // the lines and the AABB.
1286
1287 const float epsilon = 1.e-6f;
1288 const LineParametersWithConfidence evenApproximation =
1289 fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1290 const LineParametersWithConfidence oddApproximation =
1291 fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1292
1293 const float aabbMinX = (float)(samples.front().*predictor);
1294 const float aabbMinY = de::min(evenApproximation.offset + evenApproximation.coefficient * aabbMinX,
1295 oddApproximation.offset + oddApproximation.coefficient * aabbMinX);
1296 const float aabbMaxX = (float)(samples.back().*predictor);
1297 const float aabbMaxY = de::max(evenApproximation.offset + evenApproximation.coefficient * aabbMaxX,
1298 oddApproximation.offset + oddApproximation.coefficient * aabbMaxX);
1299
1300 const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1301 const float areaBetweenLines =
1302 getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient,
1303 oddApproximation.offset, oddApproximation.coefficient);
1304 const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1305
1306 return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1307 }
1308
1309 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> & samples)1310 static float calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> &samples)
1311 {
1312 return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1313 }
1314
1315 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> & samples)1316 static float calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> &samples)
1317 {
1318 return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1319 }
1320
1321 template <typename DurationType>
bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> & samples,std::vector<UploadSampleResult<DurationType>> * buckets,int numBuckets,int & minBufferSize,int & maxBufferSize)1322 static void bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> &samples,
1323 std::vector<UploadSampleResult<DurationType>> *buckets, int numBuckets,
1324 int &minBufferSize, int &maxBufferSize)
1325 {
1326 minBufferSize = 0;
1327 maxBufferSize = 0;
1328
1329 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1330 {
1331 DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1332
1333 if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1334 minBufferSize = samples[sampleNdx].allocatedSize;
1335 if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1336 maxBufferSize = samples[sampleNdx].allocatedSize;
1337 }
1338
1339 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1340 {
1341 const float bucketNdxFloat = (float)(samples[sampleNdx].allocatedSize - minBufferSize) /
1342 (float)(maxBufferSize - minBufferSize) * (float)numBuckets;
1343 const int bucketNdx = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets - 1);
1344
1345 buckets[bucketNdx].push_back(samples[sampleNdx]);
1346 }
1347 }
1348
1349 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1350 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats(
1351 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1352 {
1353 log << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1354 << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1355 << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME,
1356 stats.map.min2DecileTime)
1357 << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME,
1358 stats.map.max9DecileTime)
1359 << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1360 }
1361
1362 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1363 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats(
1364 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1365 {
1366 log << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1367 << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1368 << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1369 << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1370 << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1371 }
1372
1373 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1374 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats(
1375 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1376 {
1377 log << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1378 << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1379 << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1380 << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1381 << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1382 }
1383
1384 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1385 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats(
1386 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1387 {
1388 log << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1389 << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1390 << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1391 << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1392 << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1393 }
1394
1395 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1396 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats(
1397 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1398 {
1399 log << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1400 << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1401 << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1402 << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1403 << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1404 }
1405
1406 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1407 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats(
1408 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1409 {
1410 DE_UNREF(log);
1411 DE_UNREF(stats);
1412 }
1413
1414 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1415 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats(
1416 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1417 {
1418 DE_UNREF(log);
1419 DE_UNREF(stats);
1420 }
1421
1422 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1423 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats(
1424 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1425 {
1426 DE_UNREF(log);
1427 DE_UNREF(stats);
1428 }
1429
1430 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1431 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats(
1432 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1433 {
1434 DE_UNREF(log);
1435 DE_UNREF(stats);
1436 }
1437
1438 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1439 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats(
1440 tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1441 {
1442 DE_UNREF(log);
1443 DE_UNREF(stats);
1444 }
1445
1446 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1447 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution(
1448 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1449 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1450 {
1451 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1452 log << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1453 contributionFitting.offset)
1454 << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1455 contributionFitting.coefficient * 1024.0f * 1024.0f)
1456 << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1457 }
1458
1459 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1460 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution(
1461 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1462 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1463 {
1464 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1465 log << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1466 contributionFitting.offset)
1467 << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1468 contributionFitting.coefficient * 1024.0f * 1024.0f)
1469 << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1470 }
1471
1472 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1473 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution(
1474 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1475 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1476 {
1477 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1478 log << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1479 contributionFitting.offset)
1480 << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1481 contributionFitting.coefficient * 1024.0f * 1024.0f)
1482 << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1483 }
1484
1485 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1486 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution(
1487 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1488 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1489 {
1490 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1491 log << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1492 contributionFitting.offset)
1493 << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1494 contributionFitting.coefficient * 1024.0f * 1024.0f)
1495 << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1496 }
1497
1498 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1499 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution(
1500 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1501 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1502 {
1503 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1504 log << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1505 contributionFitting.offset)
1506 << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1507 contributionFitting.coefficient * 1024.0f * 1024.0f)
1508 << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1509 }
1510
1511 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1512 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution(
1513 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1514 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1515 {
1516 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1517 log << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1518 contributionFitting.offset)
1519 << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1520 contributionFitting.coefficient * 1024.0f * 1024.0f)
1521 << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1522 stats.render.medianTime);
1523 }
1524
1525 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1526 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution(
1527 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1528 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1529 {
1530 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1531 log << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1532 contributionFitting.offset)
1533 << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1534 contributionFitting.coefficient * 1024.0f * 1024.0f)
1535 << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1536 }
1537
1538 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1539 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution(
1540 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1541 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1542 {
1543 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1544 log << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1545 contributionFitting.offset)
1546 << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1547 contributionFitting.coefficient * 1024.0f * 1024.0f)
1548 << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME,
1549 stats.upload.medianTime);
1550 }
1551
1552 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1553 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution(
1554 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1555 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1556 {
1557 const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1558 log << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1559 contributionFitting.offset)
1560 << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1561 contributionFitting.coefficient * 1024.0f * 1024.0f)
1562 << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1563 }
1564
1565 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1566 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution(
1567 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1568 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1569 {
1570 const LineParametersWithConfidence contributionFitting =
1571 fitLineToSamples(samples, &SampleType::firstRenderDuration);
1572 log << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us",
1573 QP_KEY_TAG_TIME, contributionFitting.offset)
1574 << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB",
1575 QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1576 << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1577 stats.firstRender.medianTime);
1578 }
1579
1580 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1581 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution(
1582 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1583 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1584 {
1585 const LineParametersWithConfidence contributionFitting =
1586 fitLineToSamples(samples, &SampleType::secondRenderDuration);
1587 log << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us",
1588 QP_KEY_TAG_TIME, contributionFitting.offset)
1589 << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB",
1590 QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1591 << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1592 stats.secondRender.medianTime);
1593 }
1594
1595 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1596 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution(
1597 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1598 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1599 {
1600 DE_UNREF(log);
1601 DE_UNREF(samples);
1602 DE_UNREF(stats);
1603 }
1604
1605 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1606 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution(
1607 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1608 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1609 {
1610 DE_UNREF(log);
1611 DE_UNREF(samples);
1612 DE_UNREF(stats);
1613 }
1614
1615 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1616 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution(
1617 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1618 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1619 {
1620 DE_UNREF(log);
1621 DE_UNREF(samples);
1622 DE_UNREF(stats);
1623 }
1624
1625 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1626 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution(
1627 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1628 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1629 {
1630 DE_UNREF(log);
1631 DE_UNREF(samples);
1632 DE_UNREF(stats);
1633 }
1634
1635 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1636 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution(
1637 tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1638 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1639 {
1640 DE_UNREF(log);
1641 DE_UNREF(samples);
1642 DE_UNREF(stats);
1643 }
1644
1645 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1646 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution(
1647 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1648 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1649 {
1650 DE_UNREF(log);
1651 DE_UNREF(samples);
1652 DE_UNREF(stats);
1653 }
1654
1655 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1656 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution(
1657 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1658 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1659 {
1660 DE_UNREF(log);
1661 DE_UNREF(samples);
1662 DE_UNREF(stats);
1663 }
1664
1665 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1666 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution(
1667 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1668 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1669 {
1670 DE_UNREF(log);
1671 DE_UNREF(samples);
1672 DE_UNREF(stats);
1673 }
1674
1675 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1676 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution(
1677 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1678 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1679 {
1680 DE_UNREF(log);
1681 DE_UNREF(samples);
1682 DE_UNREF(stats);
1683 }
1684
1685 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1686 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution(
1687 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1688 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1689 {
1690 DE_UNREF(log);
1691 DE_UNREF(samples);
1692 DE_UNREF(stats);
1693 }
1694
1695 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1696 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution(
1697 tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1698 const typename SampleTypeTraits<SampleType>::StatsType &stats)
1699 {
1700 DE_UNREF(log);
1701 DE_UNREF(samples);
1702 DE_UNREF(stats);
1703 }
1704
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)1705 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1706 const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
1707 {
1708 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1709 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1710 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1711 << tcu::TestLog::ValueInfo("UploadTime", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1712 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1713 << tcu::TestLog::EndSampleInfo;
1714
1715 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1716 {
1717 const float fitResidual =
1718 (float)samples[sampleNdx].duration.fitResponseDuration -
1719 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1720 log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1721 << (int)samples[sampleNdx].duration.totalDuration << fitResidual << tcu::TestLog::EndSample;
1722 }
1723
1724 log << tcu::TestLog::EndSampleList;
1725 }
1726
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)1727 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1728 const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
1729 {
1730 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1731 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1732 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1733 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1734 << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1735 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1736 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1737 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1738 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1739 << tcu::TestLog::EndSampleInfo;
1740
1741 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1742 {
1743 const float fitResidual =
1744 (float)samples[sampleNdx].duration.fitResponseDuration -
1745 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1746 log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1747 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
1748 << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
1749 << (int)samples[sampleNdx].duration.writeDuration << fitResidual << tcu::TestLog::EndSample;
1750 }
1751
1752 log << tcu::TestLog::EndSampleList;
1753 }
1754
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1755 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1756 const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
1757 {
1758 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1759 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1760 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1761 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1762 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1763 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1764 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1765 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1766 << tcu::TestLog::EndSampleInfo;
1767
1768 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1769 {
1770 const float fitResidual =
1771 (float)samples[sampleNdx].duration.fitResponseDuration -
1772 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1773 log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1774 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
1775 << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
1776 << fitResidual << tcu::TestLog::EndSample;
1777 }
1778
1779 log << tcu::TestLog::EndSampleList;
1780 }
1781
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1782 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1783 const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
1784 {
1785 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1786 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1787 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1788 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1789 << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1790 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1791 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1792 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1793 << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1794 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1795 << tcu::TestLog::EndSampleInfo;
1796
1797 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1798 {
1799 const float fitResidual =
1800 (float)samples[sampleNdx].duration.fitResponseDuration -
1801 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1802 log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1803 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
1804 << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
1805 << (int)samples[sampleNdx].duration.writeDuration << (int)samples[sampleNdx].duration.flushDuration
1806 << fitResidual << tcu::TestLog::EndSample;
1807 }
1808
1809 log << tcu::TestLog::EndSampleList;
1810 }
1811
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1812 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1813 const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
1814 {
1815 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1816 << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1817 << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1818 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1819 << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1820 << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1821 << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1822 << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1823 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1824 << tcu::TestLog::EndSampleInfo;
1825
1826 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1827 {
1828 const float fitResidual =
1829 (float)samples[sampleNdx].duration.fitResponseDuration -
1830 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1831 log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1832 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
1833 << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
1834 << (int)samples[sampleNdx].duration.flushDuration << fitResidual << tcu::TestLog::EndSample;
1835 }
1836
1837 log << tcu::TestLog::EndSampleList;
1838 }
1839
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1840 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1841 const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
1842 {
1843 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1844 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1845 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1846 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1847 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1848 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1849 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1850 << tcu::TestLog::EndSampleInfo;
1851
1852 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1853 {
1854 const float fitResidual =
1855 (float)samples[sampleNdx].duration.fitResponseDuration -
1856 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1857 log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
1858 << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.renderDuration
1859 << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1860 }
1861
1862 log << tcu::TestLog::EndSampleList;
1863 }
1864
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1865 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1866 const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
1867 {
1868 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1869 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1870 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1871 << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
1872 QP_SAMPLE_VALUE_TAG_PREDICTOR)
1873 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1874 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1875 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1876 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1877 << tcu::TestLog::EndSampleInfo;
1878
1879 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1880 {
1881 const float fitResidual =
1882 (float)samples[sampleNdx].duration.fitResponseDuration -
1883 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1884 log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
1885 << samples[sampleNdx].unrelatedDataSize << (int)samples[sampleNdx].duration.renderReadDuration
1886 << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
1887 << fitResidual << tcu::TestLog::EndSample;
1888 }
1889
1890 log << tcu::TestLog::EndSampleList;
1891 }
1892
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1893 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1894 const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
1895 {
1896 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1897 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1898 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1899 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1900 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1901 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1902 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1903 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1904 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1905 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1906 << tcu::TestLog::EndSampleInfo;
1907
1908 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1909 {
1910 const float fitResidual =
1911 (float)samples[sampleNdx].duration.fitResponseDuration -
1912 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1913 log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1914 << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
1915 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.uploadDuration
1916 << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
1917 << fitResidual << tcu::TestLog::EndSample;
1918 }
1919
1920 log << tcu::TestLog::EndSampleList;
1921 }
1922
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1923 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1924 const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
1925 {
1926 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1927 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1928 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1929 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1930 << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
1931 QP_SAMPLE_VALUE_TAG_PREDICTOR)
1932 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1933 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1934 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1935 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1936 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1937 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1938 << tcu::TestLog::EndSampleInfo;
1939
1940 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1941 {
1942 const float fitResidual =
1943 (float)samples[sampleNdx].duration.fitResponseDuration -
1944 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1945 log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1946 << samples[sampleNdx].numVertices << samples[sampleNdx].unrelatedDataSize
1947 << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration
1948 << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.renderDuration
1949 << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1950 }
1951
1952 log << tcu::TestLog::EndSampleList;
1953 }
1954
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1955 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1956 const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
1957 {
1958 log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1959 << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1960 << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1961 << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1962 << tcu::TestLog::ValueInfo("DrawReadTime", "Second draw call and ReadPixels time", "us",
1963 QP_SAMPLE_VALUE_TAG_RESPONSE)
1964 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1965 << tcu::TestLog::ValueInfo("FirstDrawCallTime", "First draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1966 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1967 << tcu::TestLog::ValueInfo("SecondDrawCallTime", "Second draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1968 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1969 << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1970 << tcu::TestLog::EndSampleInfo;
1971
1972 for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1973 {
1974 const float fitResidual =
1975 (float)samples[sampleNdx].duration.fitResponseDuration -
1976 (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1977 log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1978 << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
1979 << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.firstRenderDuration
1980 << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.secondRenderDuration
1981 << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1982 }
1983
1984 log << tcu::TestLog::EndSampleList;
1985 }
1986
1987 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,bool logBucketPerformance)1988 static UploadSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
1989 const std::vector<UploadSampleResult<SampleType>> &samples,
1990 bool logBucketPerformance)
1991 {
1992 // Assume data is linear with some outliers, fit a line
1993 const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
1994 const typename SampleTypeTraits<SampleType>::StatsType resultStats =
1995 calculateSampleStatistics(theilSenFitting, samples);
1996 float approximatedTransferRate;
1997 float approximatedTransferRateNoConstant;
1998
1999 // Output raw samples
2000 {
2001 const tcu::ScopedLogSection section(log, "Samples", "Samples");
2002 logSampleList(log, theilSenFitting, samples);
2003 }
2004
2005 // Calculate results for different ranges
2006 if (logBucketPerformance)
2007 {
2008 const int numBuckets = 4;
2009 int minBufferSize = 0;
2010 int maxBufferSize = 0;
2011 std::vector<UploadSampleResult<SampleType>> buckets[numBuckets];
2012
2013 bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
2014
2015 for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
2016 {
2017 if (buckets[bucketNdx].empty())
2018 continue;
2019
2020 // Print a nice result summary
2021
2022 const int bucketRangeMin =
2023 minBufferSize + (int)(((float)bucketNdx / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
2024 const int bucketRangeMax = minBufferSize + (int)(((float)(bucketNdx + 1) / (float)numBuckets) *
2025 (float)(maxBufferSize - minBufferSize));
2026 const typename SampleTypeTraits<SampleType>::StatsType stats =
2027 calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
2028 const tcu::ScopedLogSection section(
2029 log, "BufferSizeRange",
2030 std::string("Transfer performance with buffer size in range [")
2031 .append(getHumanReadableByteSize(bucketRangeMin)
2032 .append(", ")
2033 .append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
2034
2035 logMapRangeStats<SampleType>(log, stats);
2036 logUnmapStats<SampleType>(log, stats);
2037 logWriteStats<SampleType>(log, stats);
2038 logFlushStats<SampleType>(log, stats);
2039 logAllocStats<SampleType>(log, stats);
2040
2041 log << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
2042 << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
2043 << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME,
2044 stats.result.min2DecileTime)
2045 << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME,
2046 stats.result.max9DecileTime)
2047 << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
2048 << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2049 stats.medianRate / 1024.0f / 1024.0f)
2050 << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME,
2051 stats.maxDiffTime)
2052 << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME,
2053 stats.maxDiff9DecileTime)
2054 << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME,
2055 stats.medianDiffTime)
2056 << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE,
2057 stats.maxRelDiffTime * 100.0f)
2058 << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%",
2059 QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
2060 << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%",
2061 QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
2062 }
2063 }
2064
2065 // Contributions
2066 if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
2067 {
2068 const tcu::ScopedLogSection section(log, "Contribution", "Contributions");
2069
2070 logMapContribution(log, samples, resultStats);
2071 logUnmapContribution(log, samples, resultStats);
2072 logWriteContribution(log, samples, resultStats);
2073 logFlushContribution(log, samples, resultStats);
2074 logAllocContribution(log, samples, resultStats);
2075 }
2076
2077 // Print results
2078 {
2079 const tcu::ScopedLogSection section(log, "Results", "Results");
2080
2081 const int medianBufferSize = (samples.front().bufferSize + samples.back().bufferSize) / 2;
2082 const float approximatedTransferTime =
2083 (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
2084 const float approximatedTransferTimeNoConstant =
2085 (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
2086 const float sampleLinearity = calculateSampleFitLinearity(samples);
2087 const float sampleTemporalStability = calculateSampleTemporalStability(samples);
2088
2089 approximatedTransferRateNoConstant = (float)medianBufferSize / approximatedTransferTimeNoConstant;
2090 approximatedTransferRate = (float)medianBufferSize / approximatedTransferTime;
2091
2092 log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
2093 sampleLinearity * 100.0f)
2094 << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
2095 sampleTemporalStability * 100.0f)
2096 << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
2097 theilSenFitting.offset)
2098 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
2099 "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
2100 theilSenFitting.offsetConfidenceLower)
2101 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
2102 "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
2103 theilSenFitting.offsetConfidenceUpper)
2104 << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
2105 theilSenFitting.coefficient * 1024.0f * 1024.0f)
2106 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
2107 "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
2108 theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
2109 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
2110 "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
2111 theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
2112 << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s",
2113 QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
2114 << tcu::TestLog::Float("ApproximatedTransferRateNoConstant",
2115 "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE,
2116 approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
2117 << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
2118 resultStats.result.medianTime)
2119 << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2120 resultStats.medianRate / 1024.0f / 1024.0f);
2121 }
2122
2123 // return approximated transfer rate
2124 {
2125 UploadSampleAnalyzeResult result;
2126
2127 result.transferRateMedian = resultStats.medianRate;
2128 result.transferRateAtRange = approximatedTransferRate;
2129 result.transferRateAtInfinity = approximatedTransferRateNoConstant;
2130
2131 return result;
2132 }
2133 }
2134
2135 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples)2136 static RenderSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
2137 const std::vector<RenderSampleResult<SampleType>> &samples)
2138 {
2139 // Assume data is linear with some outliers, fit a line
2140 const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
2141 const typename SampleTypeTraits<SampleType>::StatsType resultStats =
2142 calculateSampleStatistics(theilSenFitting, samples);
2143 float approximatedProcessingRate;
2144 float approximatedProcessingRateNoConstant;
2145
2146 // output raw samples
2147 {
2148 const tcu::ScopedLogSection section(log, "Samples", "Samples");
2149 logSampleList(log, theilSenFitting, samples);
2150 }
2151
2152 // Contributions
2153 if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
2154 {
2155 const tcu::ScopedLogSection section(log, "Contribution", "Contributions");
2156
2157 logFirstRenderContribution(log, samples, resultStats);
2158 logUploadContribution(log, samples, resultStats);
2159 logRenderContribution(log, samples, resultStats);
2160 logSecondRenderContribution(log, samples, resultStats);
2161 logReadContribution(log, samples, resultStats);
2162 logTotalContribution(log, samples, resultStats);
2163 }
2164
2165 // print results
2166 {
2167 const tcu::ScopedLogSection section(log, "Results", "Results");
2168
2169 const int medianDataSize = (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
2170 const float approximatedRenderTime =
2171 (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
2172 const float approximatedRenderTimeNoConstant =
2173 (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
2174 const float sampleLinearity = calculateSampleFitLinearity(samples);
2175 const float sampleTemporalStability = calculateSampleTemporalStability(samples);
2176
2177 approximatedProcessingRateNoConstant = (float)medianDataSize / approximatedRenderTimeNoConstant;
2178 approximatedProcessingRate = (float)medianDataSize / approximatedRenderTime;
2179
2180 log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
2181 sampleLinearity * 100.0f)
2182 << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
2183 sampleTemporalStability * 100.0f)
2184 << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
2185 theilSenFitting.offset)
2186 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
2187 "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
2188 theilSenFitting.offsetConfidenceLower)
2189 << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
2190 "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
2191 theilSenFitting.offsetConfidenceUpper)
2192 << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
2193 theilSenFitting.coefficient * 1024.0f * 1024.0f)
2194 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
2195 "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
2196 theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
2197 << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
2198 "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
2199 theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
2200 << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s",
2201 QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
2202 << tcu::TestLog::Float("ApproximatedProcessRateNoConstant",
2203 "Approximated processing rate without constant cost", "MB / s",
2204 QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
2205 << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
2206 resultStats.result.medianTime)
2207 << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2208 resultStats.medianRate / 1024.0f / 1024.0f);
2209 }
2210
2211 // return approximated render rate
2212 {
2213 RenderSampleAnalyzeResult result;
2214
2215 result.renderRateMedian = resultStats.medianRate;
2216 result.renderRateAtRange = approximatedProcessingRate;
2217 result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
2218
2219 return result;
2220 }
2221 return RenderSampleAnalyzeResult();
2222 }
2223
generateTwoPassRandomIterationOrder(std::vector<int> & iterationOrder,int numSamples)2224 static void generateTwoPassRandomIterationOrder(std::vector<int> &iterationOrder, int numSamples)
2225 {
2226 de::Random rnd(0xabc);
2227 const int midPoint = (numSamples + 1) / 2; // !< ceil(m_numSamples / 2)
2228
2229 DE_ASSERT((int)iterationOrder.size() == numSamples);
2230
2231 // Two "passes" over range, randomize order in both passes
2232 // This allows to us detect if iterations are not independent
2233 // (first run and later run samples differ significantly?)
2234
2235 for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
2236 iterationOrder[sampleNdx] = sampleNdx * 2;
2237 for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
2238 iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
2239
2240 for (int ndx = 0; ndx < midPoint; ++ndx)
2241 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
2242 for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
2243 std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size() - 1)]);
2244 }
2245
2246 template <typename SampleType>
2247 class BasicBufferCase : public TestCase
2248 {
2249 public:
2250 enum Flags
2251 {
2252 FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
2253 };
2254 BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
2255 int numSamples, int flags);
2256 ~BasicBufferCase(void);
2257
2258 virtual void init(void);
2259 virtual void deinit(void);
2260
2261 protected:
2262 IterateResult iterate(void);
2263
2264 virtual bool runSample(int iteration, UploadSampleResult<SampleType> &sample) = 0;
2265 virtual void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results) = 0;
2266
2267 void disableGLWarmup(void);
2268 void waitGLResults(void);
2269
2270 enum
2271 {
2272 UNUSED_RENDER_AREA_SIZE = 32
2273 };
2274
2275 glu::ShaderProgram *m_minimalProgram;
2276 int32_t m_minimalProgramPosLoc;
2277 uint32_t m_bufferID;
2278
2279 const int m_numSamples;
2280 const int m_bufferSizeMin;
2281 const int m_bufferSizeMax;
2282 const bool m_allocateLargerBuffer;
2283
2284 private:
2285 int m_iteration;
2286 std::vector<int> m_iterationOrder;
2287 std::vector<UploadSampleResult<SampleType>> m_results;
2288
2289 bool m_useGL;
2290 int m_bufferRandomizerTimer;
2291 };
2292
2293 template <typename SampleType>
BasicBufferCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,int flags)2294 BasicBufferCase<SampleType>::BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
2295 int bufferSizeMax, int numSamples, int flags)
2296 : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, desc)
2297 , m_minimalProgram(DE_NULL)
2298 , m_minimalProgramPosLoc(-1)
2299 , m_bufferID(0)
2300 , m_numSamples(numSamples)
2301 , m_bufferSizeMin(bufferSizeMin)
2302 , m_bufferSizeMax(bufferSizeMax)
2303 , m_allocateLargerBuffer((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
2304 , m_iteration(0)
2305 , m_iterationOrder(numSamples)
2306 , m_results(numSamples)
2307 , m_useGL(true)
2308 , m_bufferRandomizerTimer(0)
2309 {
2310 // "randomize" iteration order. Deterministic, patternless
2311 generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
2312
2313 // choose buffer sizes
2314 for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
2315 {
2316 const int rawBufferSize =
2317 (int)deFloatFloor((float)bufferSizeMin +
2318 (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples));
2319 const int bufferSize = deAlign32(rawBufferSize, 16);
2320 const int allocatedBufferSize =
2321 deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16);
2322
2323 m_results[sampleNdx].bufferSize = bufferSize;
2324 m_results[sampleNdx].allocatedSize = allocatedBufferSize;
2325 m_results[sampleNdx].writtenSize = -1;
2326 }
2327 }
2328
2329 template <typename SampleType>
~BasicBufferCase(void)2330 BasicBufferCase<SampleType>::~BasicBufferCase(void)
2331 {
2332 deinit();
2333 }
2334
2335 template <typename SampleType>
init(void)2336 void BasicBufferCase<SampleType>::init(void)
2337 {
2338 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2339
2340 if (!m_useGL)
2341 return;
2342
2343 // \note Viewport size is not checked, it won't matter if the render target actually is smaller than UNUSED_RENDER_AREA_SIZE
2344
2345 // minimal shader
2346
2347 m_minimalProgram = new glu::ShaderProgram(m_context.getRenderContext(),
2348 glu::ProgramSources() << glu::VertexSource(s_minimalVertexShader)
2349 << glu::FragmentSource(s_minimalFragnentShader));
2350 if (!m_minimalProgram->isOk())
2351 {
2352 m_testCtx.getLog() << *m_minimalProgram;
2353 throw tcu::TestError("failed to build shader program");
2354 }
2355
2356 m_minimalProgramPosLoc = gl.getAttribLocation(m_minimalProgram->getProgram(), "a_position");
2357 if (m_minimalProgramPosLoc == -1)
2358 throw tcu::TestError("a_position location was -1");
2359 }
2360
2361 template <typename SampleType>
deinit(void)2362 void BasicBufferCase<SampleType>::deinit(void)
2363 {
2364 if (m_bufferID)
2365 {
2366 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2367 m_bufferID = 0;
2368 }
2369
2370 delete m_minimalProgram;
2371 m_minimalProgram = DE_NULL;
2372 }
2373
2374 template <typename SampleType>
iterate(void)2375 TestCase::IterateResult BasicBufferCase<SampleType>::iterate(void)
2376 {
2377 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2378 static bool buffersWarmedUp = false;
2379
2380 static const uint32_t usages[] = {
2381 GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY, GL_STATIC_DRAW, GL_STATIC_READ,
2382 GL_STATIC_COPY, GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2383 };
2384
2385 // Allocate some random sized buffers and remove them to
2386 // make sure the first samples too have some buffers removed
2387 // just before their allocation. This is only needed by the
2388 // the first test.
2389
2390 if (m_useGL && !buffersWarmedUp)
2391 {
2392 const int numRandomBuffers = 6;
2393 const int numRepeats = 10;
2394 const int maxBufferSize = 16777216;
2395 const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
2396 de::Random rnd(0x1234);
2397 uint32_t bufferIDs[numRandomBuffers] = {0};
2398
2399 gl.useProgram(m_minimalProgram->getProgram());
2400 gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2401 gl.enableVertexAttribArray(m_minimalProgramPosLoc);
2402
2403 for (int ndx = 0; ndx < numRepeats; ++ndx)
2404 {
2405 // Create buffer and maybe draw from it
2406 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2407 {
2408 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2409 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2410
2411 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2412 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2413 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2414
2415 if (rnd.getBool())
2416 {
2417 gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2418 gl.drawArrays(GL_POINTS, 0, 1);
2419 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2420 }
2421 }
2422
2423 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2424 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2425
2426 waitGLResults();
2427 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2428
2429 m_testCtx.touchWatchdog();
2430 }
2431
2432 buffersWarmedUp = true;
2433 return CONTINUE;
2434 }
2435 else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2436 {
2437 // Do some random buffer operations to every now and then
2438 // to make sure the previous test iterations won't affect
2439 // following test runs.
2440
2441 const int numRandomBuffers = 3;
2442 const int maxBufferSize = 16777216;
2443 const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
2444 de::Random rnd(0x1234 + 0xabc * m_bufferRandomizerTimer);
2445
2446 // BufferData
2447 {
2448 uint32_t bufferIDs[numRandomBuffers] = {0};
2449
2450 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2451 {
2452 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2453 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2454
2455 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2456 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2457 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2458 }
2459
2460 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2461 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2462 }
2463
2464 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2465
2466 // Do some memory mappings
2467 {
2468 uint32_t bufferIDs[numRandomBuffers] = {0};
2469
2470 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2471 {
2472 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2473 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2474 void *ptr;
2475
2476 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2477 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2478 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2479
2480 gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2481 gl.drawArrays(GL_POINTS, 0, 1);
2482 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2483
2484 if (rnd.getBool())
2485 waitGLResults();
2486
2487 ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2488 if (ptr)
2489 {
2490 medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2491 gl.unmapBuffer(GL_ARRAY_BUFFER);
2492 }
2493 }
2494
2495 for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2496 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2497
2498 waitGLResults();
2499 }
2500
2501 GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2502 return CONTINUE;
2503 }
2504 else
2505 {
2506 const int currentIteration = m_iteration;
2507 const int sampleNdx = m_iterationOrder[currentIteration];
2508 const bool sampleRunSuccessful = runSample(currentIteration, m_results[sampleNdx]);
2509
2510 GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2511
2512 // Retry failed samples
2513 if (!sampleRunSuccessful)
2514 return CONTINUE;
2515
2516 if (++m_iteration >= m_numSamples)
2517 {
2518 logAndSetTestResult(m_results);
2519 return STOP;
2520 }
2521 else
2522 return CONTINUE;
2523 }
2524 }
2525
2526 template <typename SampleType>
disableGLWarmup(void)2527 void BasicBufferCase<SampleType>::disableGLWarmup(void)
2528 {
2529 m_useGL = false;
2530 }
2531
2532 template <typename SampleType>
waitGLResults(void)2533 void BasicBufferCase<SampleType>::waitGLResults(void)
2534 {
2535 tcu::Surface unusedSurface(UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2536 glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
2537 }
2538
2539 template <typename SampleType>
2540 class BasicUploadCase : public BasicBufferCase<SampleType>
2541 {
2542 public:
2543 enum CaseType
2544 {
2545 CASE_NO_BUFFERS = 0,
2546 CASE_NEW_BUFFER,
2547 CASE_UNSPECIFIED_BUFFER,
2548 CASE_SPECIFIED_BUFFER,
2549 CASE_USED_BUFFER,
2550 CASE_USED_LARGER_BUFFER,
2551
2552 CASE_LAST
2553 };
2554
2555 enum CaseFlags
2556 {
2557 FLAG_DONT_LOG_BUFFER_INFO = 0x01,
2558 FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT = 0x02,
2559 };
2560
2561 enum ResultType
2562 {
2563 RESULT_MEDIAN_TRANSFER_RATE = 0,
2564 RESULT_ASYMPTOTIC_TRANSFER_RATE,
2565 };
2566
2567 BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
2568 int numSamples, uint32_t bufferUsage, CaseType caseType, ResultType resultType, int flags = 0);
2569
2570 ~BasicUploadCase(void);
2571
2572 virtual void init(void);
2573 virtual void deinit(void);
2574
2575 private:
2576 bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
2577 void createBuffer(int bufferSize, int iteration);
2578 void deleteBuffer(int bufferSize);
2579 void useBuffer(int bufferSize);
2580
2581 virtual void testBufferUpload(UploadSampleResult<SampleType> &result, int writeSize) = 0;
2582 void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);
2583
2584 uint32_t m_unusedBufferID;
2585
2586 protected:
2587 const CaseType m_caseType;
2588 const ResultType m_resultType;
2589 const uint32_t m_bufferUsage;
2590 const bool m_logBufferInfo;
2591 const bool m_bufferUnspecifiedContent;
2592 std::vector<uint8_t> m_zeroData;
2593
2594 using BasicBufferCase<SampleType>::m_testCtx;
2595 using BasicBufferCase<SampleType>::m_context;
2596
2597 using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
2598 using BasicBufferCase<SampleType>::m_minimalProgram;
2599 using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
2600 using BasicBufferCase<SampleType>::m_bufferID;
2601 using BasicBufferCase<SampleType>::m_numSamples;
2602 using BasicBufferCase<SampleType>::m_bufferSizeMin;
2603 using BasicBufferCase<SampleType>::m_bufferSizeMax;
2604 using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2605 };
2606
2607 template <typename SampleType>
BasicUploadCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,uint32_t bufferUsage,CaseType caseType,ResultType resultType,int flags)2608 BasicUploadCase<SampleType>::BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
2609 int bufferSizeMax, int numSamples, uint32_t bufferUsage, CaseType caseType,
2610 ResultType resultType, int flags)
2611 : BasicBufferCase<SampleType>(
2612 context, name, desc, bufferSizeMin, bufferSizeMax, numSamples,
2613 (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2614 , m_unusedBufferID(0)
2615 , m_caseType(caseType)
2616 , m_resultType(resultType)
2617 , m_bufferUsage(bufferUsage)
2618 , m_logBufferInfo((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2619 , m_bufferUnspecifiedContent((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2620 , m_zeroData()
2621 {
2622 DE_ASSERT(m_caseType < CASE_LAST);
2623 }
2624
2625 template <typename SampleType>
~BasicUploadCase(void)2626 BasicUploadCase<SampleType>::~BasicUploadCase(void)
2627 {
2628 deinit();
2629 }
2630
2631 template <typename SampleType>
init(void)2632 void BasicUploadCase<SampleType>::init(void)
2633 {
2634 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2635
2636 BasicBufferCase<SampleType>::init();
2637
2638 // zero buffer as upload source
2639 m_zeroData.resize(m_bufferSizeMax, 0x00);
2640
2641 // unused buffer
2642
2643 gl.genBuffers(1, &m_unusedBufferID);
2644 GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2645
2646 // log basic info
2647
2648 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << m_numSamples
2649 << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
2650 "tested before samples at odd positions.\n"
2651 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
2652 << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;
2653
2654 if (m_logBufferInfo)
2655 {
2656 switch (m_caseType)
2657 {
2658 case CASE_NO_BUFFERS:
2659 break;
2660
2661 case CASE_NEW_BUFFER:
2662 m_testCtx.getLog() << tcu::TestLog::Message
2663 << "Target buffer is generated but not specified (i.e glBufferData() not called)."
2664 << tcu::TestLog::EndMessage;
2665 break;
2666
2667 case CASE_UNSPECIFIED_BUFFER:
2668 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)."
2669 << tcu::TestLog::EndMessage;
2670 break;
2671
2672 case CASE_SPECIFIED_BUFFER:
2673 m_testCtx.getLog() << tcu::TestLog::Message
2674 << "Target buffer contents are specified prior testing with glBufferData(data)."
2675 << tcu::TestLog::EndMessage;
2676 break;
2677
2678 case CASE_USED_BUFFER:
2679 m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing."
2680 << tcu::TestLog::EndMessage;
2681 break;
2682
2683 case CASE_USED_LARGER_BUFFER:
2684 m_testCtx.getLog() << tcu::TestLog::Message
2685 << "Target buffer is larger and has been used in drawing before testing."
2686 << tcu::TestLog::EndMessage;
2687 break;
2688
2689 default:
2690 DE_ASSERT(false);
2691 break;
2692 }
2693 }
2694
2695 if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2696 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples."
2697 << tcu::TestLog::EndMessage;
2698 else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2699 m_testCtx.getLog() << tcu::TestLog::Message
2700 << "Test result is the asymptotic transfer rate as the buffer size approaches infinity."
2701 << tcu::TestLog::EndMessage;
2702 else
2703 DE_ASSERT(false);
2704 }
2705
2706 template <typename SampleType>
deinit(void)2707 void BasicUploadCase<SampleType>::deinit(void)
2708 {
2709 if (m_unusedBufferID)
2710 {
2711 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_unusedBufferID);
2712 m_unusedBufferID = 0;
2713 }
2714
2715 m_zeroData = std::vector<uint8_t>();
2716
2717 BasicBufferCase<SampleType>::deinit();
2718 }
2719
2720 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)2721 bool BasicUploadCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
2722 {
2723 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2724 const int allocatedBufferSize = sample.allocatedSize;
2725 const int bufferSize = sample.bufferSize;
2726
2727 if (m_caseType != CASE_NO_BUFFERS)
2728 createBuffer(iteration, allocatedBufferSize);
2729
2730 // warmup CPU before the test to make sure the power management governor
2731 // keeps us in the "high performance" mode
2732 {
2733 deYield();
2734 tcu::warmupCPU();
2735 deYield();
2736 }
2737
2738 testBufferUpload(sample, bufferSize);
2739 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2740
2741 if (m_caseType != CASE_NO_BUFFERS)
2742 deleteBuffer(bufferSize);
2743
2744 return true;
2745 }
2746
2747 template <typename SampleType>
createBuffer(int iteration,int bufferSize)2748 void BasicUploadCase<SampleType>::createBuffer(int iteration, int bufferSize)
2749 {
2750 DE_ASSERT(!m_bufferID);
2751 DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2752
2753 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2754
2755 // create buffer
2756
2757 if (m_caseType == CASE_NO_BUFFERS)
2758 return;
2759
2760 // create empty buffer
2761
2762 gl.genBuffers(1, &m_bufferID);
2763 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2764 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2765
2766 if (m_caseType == CASE_NEW_BUFFER)
2767 {
2768 // upload something else first, this should reduce noise in samples
2769
2770 de::Random rng(0xbadc * iteration);
2771 const int sizeDelta = rng.getInt(0, 2097140);
2772 const int unusedUploadSize =
2773 deAlign32(1048576 + sizeDelta, 4 * 4); // Vary buffer size to make sure it is always reallocated
2774 const std::vector<uint8_t> unusedData(unusedUploadSize, 0x20);
2775
2776 gl.bindBuffer(GL_ARRAY_BUFFER, m_unusedBufferID);
2777 gl.bufferData(GL_ARRAY_BUFFER, unusedUploadSize, &unusedData[0], m_bufferUsage);
2778
2779 // make sure upload won't interfere with the test
2780 useBuffer(unusedUploadSize);
2781
2782 // don't kill the buffer so that the following upload cannot potentially reuse the buffer
2783
2784 return;
2785 }
2786
2787 // specify it
2788
2789 if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2790 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2791 else
2792 {
2793 const std::vector<uint8_t> unusedData(bufferSize, 0x20);
2794 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);
2795 }
2796
2797 if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2798 return;
2799
2800 // use it and make sure it is uploaded
2801
2802 useBuffer(bufferSize);
2803 DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2804 }
2805
2806 template <typename SampleType>
deleteBuffer(int bufferSize)2807 void BasicUploadCase<SampleType>::deleteBuffer(int bufferSize)
2808 {
2809 DE_ASSERT(m_bufferID);
2810 DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2811
2812 // render from the buffer to make sure it actually made it to the gpu. This is to
2813 // make sure that if the upload actually happens later or is happening right now in
2814 // the background, it will not interfere with further test runs
2815
2816 // if buffer contains unspecified content, sourcing data from it results in undefined
2817 // results, possibly including program termination. Specify all data to prevent such
2818 // case from happening
2819
2820 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2821
2822 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2823
2824 if (m_bufferUnspecifiedContent)
2825 {
2826 const std::vector<uint8_t> unusedData(bufferSize, 0x20);
2827 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);
2828
2829 GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2830 }
2831
2832 useBuffer(bufferSize);
2833
2834 gl.deleteBuffers(1, &m_bufferID);
2835 m_bufferID = 0;
2836 }
2837
2838 template <typename SampleType>
useBuffer(int bufferSize)2839 void BasicUploadCase<SampleType>::useBuffer(int bufferSize)
2840 {
2841 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2842
2843 gl.useProgram(m_minimalProgram->getProgram());
2844
2845 gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2846 gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2847 gl.enableVertexAttribArray(m_minimalProgramPosLoc);
2848
2849 // use whole buffer to make sure buffer is uploaded by drawing first and last
2850 DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2851 gl.drawArrays(GL_POINTS, 0, 1);
2852 gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2853
2854 BasicBufferCase<SampleType>::waitGLResults();
2855 }
2856
2857 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)2858 void BasicUploadCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
2859 {
2860 const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, true);
2861
2862 // with small buffers, report the median transfer rate of the samples
2863 // with large buffers, report the expected preformance of infinitely large buffers
2864 const float rate = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) :
2865 (analysis.transferRateMedian);
2866
2867 if (rate == std::numeric_limits<float>::infinity())
2868 {
2869 // sample times are 1) invalid or 2) timer resolution too low
2870 // report speed 0 bytes / s since real value cannot be determined
2871 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2872 }
2873 else
2874 {
2875 // report transfer rate in MB / s
2876 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2877 }
2878 }
2879
2880 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2881 {
2882 public:
2883 ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2884 int numSamples, bool largeBuffersCase);
2885 ~ReferenceMemcpyCase(void);
2886
2887 void init(void);
2888 void deinit(void);
2889
2890 private:
2891 void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
2892
2893 std::vector<uint8_t> m_dstBuf;
2894 };
2895
ReferenceMemcpyCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,bool largeBuffersCase)2896 ReferenceMemcpyCase::ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
2897 int maxBufferSize, int numSamples, bool largeBuffersCase)
2898 : BasicUploadCase<SingleOperationDuration>(
2899 ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS,
2900 (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2901 , m_dstBuf()
2902 {
2903 disableGLWarmup();
2904 }
2905
~ReferenceMemcpyCase(void)2906 ReferenceMemcpyCase::~ReferenceMemcpyCase(void)
2907 {
2908 }
2909
init(void)2910 void ReferenceMemcpyCase::init(void)
2911 {
2912 // Describe what the test tries to do
2913 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2914
2915 m_dstBuf.resize(m_bufferSizeMax, 0x00);
2916
2917 BasicUploadCase<SingleOperationDuration>::init();
2918 }
2919
deinit(void)2920 void ReferenceMemcpyCase::deinit(void)
2921 {
2922 m_dstBuf = std::vector<uint8_t>();
2923 BasicUploadCase<SingleOperationDuration>::deinit();
2924 }
2925
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2926 void ReferenceMemcpyCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
2927 {
2928 // write
2929 result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2930 result.duration.fitResponseDuration = result.duration.totalDuration;
2931
2932 result.writtenSize = bufferSize;
2933 }
2934
2935 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2936 {
2937 public:
2938 BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2939 int numSamples, uint32_t bufferUsage, CaseType caseType);
2940 ~BufferDataUploadCase(void);
2941
2942 void init(void);
2943
2944 private:
2945 void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
2946 };
2947
BufferDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,CaseType caseType)2948 BufferDataUploadCase::BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
2949 int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType caseType)
2950 : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
2951 caseType, RESULT_MEDIAN_TRANSFER_RATE)
2952 {
2953 }
2954
~BufferDataUploadCase(void)2955 BufferDataUploadCase::~BufferDataUploadCase(void)
2956 {
2957 }
2958
init(void)2959 void BufferDataUploadCase::init(void)
2960 {
2961 // Describe what the test tries to do
2962 m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2963
2964 BasicUploadCase<SingleOperationDuration>::init();
2965 }
2966
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2967 void BufferDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
2968 {
2969 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2970
2971 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2972
2973 // upload
2974 {
2975 uint64_t startTime;
2976 uint64_t endTime;
2977
2978 startTime = deGetMicroseconds();
2979 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2980 endTime = deGetMicroseconds();
2981
2982 result.duration.totalDuration = endTime - startTime;
2983 result.duration.fitResponseDuration = result.duration.totalDuration;
2984 result.writtenSize = bufferSize;
2985 }
2986 }
2987
2988 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2989 {
2990 public:
2991 enum Flags
2992 {
2993 FLAG_FULL_UPLOAD = 0x01,
2994 FLAG_PARTIAL_UPLOAD = 0x02,
2995 FLAG_INVALIDATE_BEFORE_USE = 0x04,
2996 };
2997
2998 BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2999 int numSamples, uint32_t bufferUsage, CaseType parentCase, int flags);
3000 ~BufferSubDataUploadCase(void);
3001
3002 void init(void);
3003
3004 private:
3005 void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3006
3007 const bool m_fullUpload;
3008 const bool m_invalidateBeforeUse;
3009 };
3010
BufferSubDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,CaseType parentCase,int flags)3011 BufferSubDataUploadCase::BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3012 int maxBufferSize, int numSamples, uint32_t bufferUsage,
3013 CaseType parentCase, int flags)
3014 : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
3015 parentCase, RESULT_MEDIAN_TRANSFER_RATE)
3016 , m_fullUpload((flags & FLAG_FULL_UPLOAD) != 0)
3017 , m_invalidateBeforeUse((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
3018 {
3019 DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
3020 DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
3021 }
3022
~BufferSubDataUploadCase(void)3023 BufferSubDataUploadCase::~BufferSubDataUploadCase(void)
3024 {
3025 }
3026
init(void)3027 void BufferSubDataUploadCase::init(void)
3028 {
3029 // Describe what the test tries to do
3030 m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferSubData() function call performance. "
3031 << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") :
3032 ("Half of the buffer data is updated with glBufferSubData. "))
3033 << ((m_invalidateBeforeUse) ?
3034 ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") :
3035 (""))
3036 << "\n"
3037 << tcu::TestLog::EndMessage;
3038
3039 BasicUploadCase<SingleOperationDuration>::init();
3040 }
3041
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3042 void BufferSubDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
3043 {
3044 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3045
3046 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3047
3048 // "invalidate", upload null
3049 if (m_invalidateBeforeUse)
3050 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3051
3052 // upload
3053 {
3054 uint64_t startTime;
3055 uint64_t endTime;
3056
3057 startTime = deGetMicroseconds();
3058
3059 if (m_fullUpload)
3060 gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
3061 else
3062 {
3063 // upload to buffer center
3064 gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
3065 }
3066
3067 endTime = deGetMicroseconds();
3068
3069 result.duration.totalDuration = endTime - startTime;
3070 result.duration.fitResponseDuration = result.duration.totalDuration;
3071
3072 if (m_fullUpload)
3073 result.writtenSize = bufferSize;
3074 else
3075 result.writtenSize = bufferSize / 2;
3076 }
3077 }
3078
3079 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
3080 {
3081 public:
3082 enum Flags
3083 {
3084 FLAG_PARTIAL = 0x01,
3085 FLAG_MANUAL_INVALIDATION = 0x02,
3086 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
3087 FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08,
3088 };
3089
3090 MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
3091 int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
3092 ~MapBufferRangeCase(void);
3093
3094 void init(void);
3095
3096 private:
3097 static CaseType getBaseCaseType(int caseFlags);
3098 static int getBaseFlags(uint32_t mapFlags, int caseFlags);
3099
3100 void testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);
3101 void attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);
3102
3103 const bool m_manualInvalidation;
3104 const bool m_fullUpload;
3105 const bool m_useUnusedUnspecifiedBuffer;
3106 const bool m_useUnusedSpecifiedBuffer;
3107 const uint32_t m_mapFlags;
3108 int m_unmapFailures;
3109 };
3110
MapBufferRangeCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,uint32_t mapFlags,int caseFlags)3111 MapBufferRangeCase::MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3112 int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags,
3113 int caseFlags)
3114 : BasicUploadCase<MapBufferRangeDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
3115 getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
3116 getBaseFlags(mapFlags, caseFlags))
3117 , m_manualInvalidation((caseFlags & FLAG_MANUAL_INVALIDATION) != 0)
3118 , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
3119 , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3120 , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
3121 , m_mapFlags(mapFlags)
3122 , m_unmapFailures(0)
3123 {
3124 DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
3125 DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
3126 }
3127
~MapBufferRangeCase(void)3128 MapBufferRangeCase::~MapBufferRangeCase(void)
3129 {
3130 }
3131
init(void)3132 void MapBufferRangeCase::init(void)
3133 {
3134 // Describe what the test tries to do
3135 m_testCtx.getLog()
3136 << tcu::TestLog::Message << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
3137 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
3138 << ((m_useUnusedUnspecifiedBuffer) ?
3139 ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
3140 (""))
3141 << ((m_useUnusedSpecifiedBuffer) ?
3142 ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
3143 (""))
3144 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
3145 ("The buffer has previously been used in a drawing operation.\n") :
3146 (""))
3147 << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
3148 << "Map bits:\n"
3149 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3150 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3151 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3152 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3153 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3154 << tcu::TestLog::EndMessage;
3155
3156 BasicUploadCase<MapBufferRangeDuration>::init();
3157 }
3158
getBaseCaseType(int caseFlags)3159 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType(int caseFlags)
3160 {
3161 if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
3162 return CASE_USED_BUFFER;
3163 else
3164 return CASE_NEW_BUFFER;
3165 }
3166
getBaseFlags(uint32_t mapFlags,int caseFlags)3167 int MapBufferRangeCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
3168 {
3169 int flags = FLAG_DONT_LOG_BUFFER_INFO;
3170
3171 // If buffer contains unspecified data when it is sourced (i.e drawn)
3172 // results are undefined, and system errors may occur. Signal parent
3173 // class to take this into account
3174 if (caseFlags & FLAG_PARTIAL)
3175 {
3176 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_MANUAL_INVALIDATION) != 0 ||
3177 (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3178 {
3179 flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3180 }
3181 }
3182
3183 return flags;
3184 }
3185
testBufferUpload(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)3186 void MapBufferRangeCase::testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
3187 {
3188 const int unmapFailureThreshold = 4;
3189
3190 for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3191 {
3192 try
3193 {
3194 attemptBufferMap(result, bufferSize);
3195 return;
3196 }
3197 catch (UnmapFailureError &)
3198 {
3199 }
3200 }
3201
3202 throw tcu::TestError("Unmapping failures exceeded limit");
3203 }
3204
attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)3205 void MapBufferRangeCase::attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
3206 {
3207 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3208
3209 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3210
3211 if (m_fullUpload)
3212 result.writtenSize = bufferSize;
3213 else
3214 result.writtenSize = bufferSize / 2;
3215
3216 // Create unused buffer
3217
3218 if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
3219 {
3220 uint64_t startTime;
3221 uint64_t endTime;
3222
3223 // "invalidate" or allocate, upload null
3224 startTime = deGetMicroseconds();
3225 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3226 endTime = deGetMicroseconds();
3227
3228 result.duration.allocDuration = endTime - startTime;
3229 }
3230 else if (m_useUnusedSpecifiedBuffer)
3231 {
3232 uint64_t startTime;
3233 uint64_t endTime;
3234
3235 // Specify buffer contents
3236 startTime = deGetMicroseconds();
3237 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3238 endTime = deGetMicroseconds();
3239
3240 result.duration.allocDuration = endTime - startTime;
3241 }
3242 else
3243 {
3244 // No alloc, no time
3245 result.duration.allocDuration = 0;
3246 }
3247
3248 // upload
3249 {
3250 void *mapPtr;
3251
3252 // Map
3253 {
3254 uint64_t startTime;
3255 uint64_t endTime;
3256
3257 startTime = deGetMicroseconds();
3258 if (m_fullUpload)
3259 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
3260 else
3261 {
3262 // upload to buffer center
3263 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
3264 }
3265 endTime = deGetMicroseconds();
3266
3267 if (!mapPtr)
3268 throw tcu::Exception("MapBufferRange returned NULL");
3269
3270 result.duration.mapDuration = endTime - startTime;
3271 }
3272
3273 // Write
3274 {
3275 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3276 }
3277
3278 // Unmap
3279 {
3280 uint64_t startTime;
3281 uint64_t endTime;
3282 glw::GLboolean unmapSuccessful;
3283
3284 startTime = deGetMicroseconds();
3285 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3286 endTime = deGetMicroseconds();
3287
3288 // if unmapping fails, just try again later
3289 if (!unmapSuccessful)
3290 throw UnmapFailureError();
3291
3292 result.duration.unmapDuration = endTime - startTime;
3293 }
3294
3295 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
3296 result.duration.unmapDuration + result.duration.allocDuration;
3297 result.duration.fitResponseDuration = result.duration.totalDuration;
3298 }
3299 }
3300
3301 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
3302 {
3303 public:
3304 enum Flags
3305 {
3306 FLAG_PARTIAL = 0x01,
3307 FLAG_FLUSH_IN_PARTS = 0x02,
3308 FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
3309 FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08,
3310 FLAG_FLUSH_PARTIAL = 0x10,
3311 };
3312
3313 MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
3314 int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
3315 ~MapBufferRangeFlushCase(void);
3316
3317 void init(void);
3318
3319 private:
3320 static CaseType getBaseCaseType(int caseFlags);
3321 static int getBaseFlags(uint32_t mapFlags, int caseFlags);
3322
3323 void testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);
3324 void attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);
3325
3326 const bool m_fullUpload;
3327 const bool m_flushInParts;
3328 const bool m_flushPartial;
3329 const bool m_useUnusedUnspecifiedBuffer;
3330 const bool m_useUnusedSpecifiedBuffer;
3331 const uint32_t m_mapFlags;
3332 int m_unmapFailures;
3333 };
3334
MapBufferRangeFlushCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,uint32_t mapFlags,int caseFlags)3335 MapBufferRangeFlushCase::MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3336 int maxBufferSize, int numSamples, uint32_t bufferUsage,
3337 uint32_t mapFlags, int caseFlags)
3338 : BasicUploadCase<MapBufferRangeFlushDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples,
3339 bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
3340 getBaseFlags(mapFlags, caseFlags))
3341 , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
3342 , m_flushInParts((caseFlags & FLAG_FLUSH_IN_PARTS) != 0)
3343 , m_flushPartial((caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3344 , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3345 , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
3346 , m_mapFlags(mapFlags)
3347 , m_unmapFailures(0)
3348 {
3349 DE_ASSERT(!(m_flushPartial && m_flushInParts));
3350 DE_ASSERT(!(m_flushPartial && !m_fullUpload));
3351 }
3352
~MapBufferRangeFlushCase(void)3353 MapBufferRangeFlushCase::~MapBufferRangeFlushCase(void)
3354 {
3355 }
3356
init(void)3357 void MapBufferRangeFlushCase::init(void)
3358 {
3359 // Describe what the test tries to do
3360 m_testCtx.getLog()
3361 << tcu::TestLog::Message
3362 << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
3363 << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
3364 << ((m_flushInParts) ?
3365 ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
3366 (m_flushPartial) ? ("Half of the buffer range is flushed.") :
3367 ("The whole mapped range is flushed in one flush call."))
3368 << "\n"
3369 << ((m_useUnusedUnspecifiedBuffer) ?
3370 ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
3371 (""))
3372 << ((m_useUnusedSpecifiedBuffer) ?
3373 ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
3374 (""))
3375 << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
3376 ("The buffer has previously been used in a drawing operation.\n") :
3377 (""))
3378 << "Map bits:\n"
3379 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3380 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3381 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3382 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3383 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3384 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3385 << tcu::TestLog::EndMessage;
3386
3387 BasicUploadCase<MapBufferRangeFlushDuration>::init();
3388 }
3389
getBaseCaseType(int caseFlags)3390 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType(int caseFlags)
3391 {
3392 if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
3393 return CASE_USED_BUFFER;
3394 else
3395 return CASE_NEW_BUFFER;
3396 }
3397
getBaseFlags(uint32_t mapFlags,int caseFlags)3398 int MapBufferRangeFlushCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
3399 {
3400 int flags = FLAG_DONT_LOG_BUFFER_INFO;
3401
3402 // If buffer contains unspecified data when it is sourced (i.e drawn)
3403 // results are undefined, and system errors may occur. Signal parent
3404 // class to take this into account
3405 if (caseFlags & FLAG_PARTIAL)
3406 {
3407 if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0 ||
3408 (caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3409 {
3410 flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3411 }
3412 }
3413
3414 return flags;
3415 }
3416
testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3417 void MapBufferRangeFlushCase::testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
3418 {
3419 const int unmapFailureThreshold = 4;
3420
3421 for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3422 {
3423 try
3424 {
3425 attemptBufferMap(result, bufferSize);
3426 return;
3427 }
3428 catch (UnmapFailureError &)
3429 {
3430 }
3431 }
3432
3433 throw tcu::TestError("Unmapping failures exceeded limit");
3434 }
3435
attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3436 void MapBufferRangeFlushCase::attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
3437 {
3438 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3439 const int mappedSize = (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3440
3441 if (m_fullUpload && !m_flushPartial)
3442 result.writtenSize = bufferSize;
3443 else
3444 result.writtenSize = bufferSize / 2;
3445
3446 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3447
3448 // Create unused buffer
3449
3450 if (m_useUnusedUnspecifiedBuffer)
3451 {
3452 uint64_t startTime;
3453 uint64_t endTime;
3454
3455 // Don't specify contents
3456 startTime = deGetMicroseconds();
3457 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3458 endTime = deGetMicroseconds();
3459
3460 result.duration.allocDuration = endTime - startTime;
3461 }
3462 else if (m_useUnusedSpecifiedBuffer)
3463 {
3464 uint64_t startTime;
3465 uint64_t endTime;
3466
3467 // Specify buffer contents
3468 startTime = deGetMicroseconds();
3469 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3470 endTime = deGetMicroseconds();
3471
3472 result.duration.allocDuration = endTime - startTime;
3473 }
3474 else
3475 {
3476 // No alloc, no time
3477 result.duration.allocDuration = 0;
3478 }
3479
3480 // upload
3481 {
3482 void *mapPtr;
3483
3484 // Map
3485 {
3486 uint64_t startTime;
3487 uint64_t endTime;
3488
3489 startTime = deGetMicroseconds();
3490 if (m_fullUpload)
3491 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3492 else
3493 {
3494 // upload to buffer center
3495 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3496 }
3497 endTime = deGetMicroseconds();
3498
3499 if (!mapPtr)
3500 throw tcu::Exception("MapBufferRange returned NULL");
3501
3502 result.duration.mapDuration = endTime - startTime;
3503 }
3504
3505 // Write
3506 {
3507 if (!m_flushPartial)
3508 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3509 else
3510 result.duration.writeDuration =
3511 medianTimeMemcpy((uint8_t *)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3512 }
3513
3514 // Flush
3515 {
3516 uint64_t startTime;
3517 uint64_t endTime;
3518
3519 startTime = deGetMicroseconds();
3520
3521 if (m_flushPartial)
3522 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize / 4, mappedSize / 2);
3523 else if (!m_flushInParts)
3524 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3525 else
3526 {
3527 const int p1 = 0;
3528 const int p2 = mappedSize / 3;
3529 const int p3 = mappedSize / 2;
3530 const int p4 = mappedSize * 2 / 4;
3531 const int p5 = mappedSize;
3532
3533 // flush in mixed order
3534 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2, p3 - p2);
3535 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1, p2 - p1);
3536 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4, p5 - p4);
3537 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3, p4 - p3);
3538 }
3539
3540 endTime = deGetMicroseconds();
3541
3542 result.duration.flushDuration = endTime - startTime;
3543 }
3544
3545 // Unmap
3546 {
3547 uint64_t startTime;
3548 uint64_t endTime;
3549 glw::GLboolean unmapSuccessful;
3550
3551 startTime = deGetMicroseconds();
3552 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3553 endTime = deGetMicroseconds();
3554
3555 // if unmapping fails, just try again later
3556 if (!unmapSuccessful)
3557 throw UnmapFailureError();
3558
3559 result.duration.unmapDuration = endTime - startTime;
3560 }
3561
3562 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
3563 result.duration.flushDuration + result.duration.unmapDuration +
3564 result.duration.allocDuration;
3565 result.duration.fitResponseDuration = result.duration.totalDuration;
3566 }
3567 }
3568
3569 template <typename SampleType>
3570 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3571 {
3572 public:
3573 ModifyAfterBasicCase(Context &context, const char *name, const char *description, int bufferSizeMin,
3574 int bufferSizeMax, uint32_t usage, bool bufferUnspecifiedAfterTest);
3575 ~ModifyAfterBasicCase(void);
3576
3577 void init(void);
3578 void deinit(void);
3579
3580 protected:
3581 void drawBufferRange(int begin, int end);
3582
3583 private:
3584 enum
3585 {
3586 NUM_SAMPLES = 20,
3587 };
3588
3589 bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
3590 bool prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result, int bufferSize);
3591 void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);
3592
3593 virtual void testWithBufferSize(UploadSampleResult<SampleType> &result, int bufferSize) = 0;
3594
3595 int m_unmappingErrors;
3596
3597 protected:
3598 const bool m_bufferUnspecifiedAfterTest;
3599 const uint32_t m_bufferUsage;
3600 std::vector<uint8_t> m_zeroData;
3601
3602 using BasicBufferCase<SampleType>::m_testCtx;
3603 using BasicBufferCase<SampleType>::m_context;
3604
3605 using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
3606 using BasicBufferCase<SampleType>::m_minimalProgram;
3607 using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
3608 using BasicBufferCase<SampleType>::m_bufferID;
3609 using BasicBufferCase<SampleType>::m_numSamples;
3610 using BasicBufferCase<SampleType>::m_bufferSizeMin;
3611 using BasicBufferCase<SampleType>::m_bufferSizeMax;
3612 using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3613 };
3614
3615 template <typename SampleType>
ModifyAfterBasicCase(Context & context,const char * name,const char * description,int bufferSizeMin,int bufferSizeMax,uint32_t usage,bool bufferUnspecifiedAfterTest)3616 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase(Context &context, const char *name, const char *description,
3617 int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3618 bool bufferUnspecifiedAfterTest)
3619 : BasicBufferCase<SampleType>(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3620 , m_unmappingErrors(0)
3621 , m_bufferUnspecifiedAfterTest(bufferUnspecifiedAfterTest)
3622 , m_bufferUsage(usage)
3623 , m_zeroData()
3624 {
3625 }
3626
3627 template <typename SampleType>
~ModifyAfterBasicCase(void)3628 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase(void)
3629 {
3630 BasicBufferCase<SampleType>::deinit();
3631 }
3632
3633 template <typename SampleType>
init(void)3634 void ModifyAfterBasicCase<SampleType>::init(void)
3635 {
3636 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3637
3638 // init parent
3639
3640 BasicBufferCase<SampleType>::init();
3641
3642 // upload source
3643 m_zeroData.resize(m_bufferSizeMax, 0x00);
3644
3645 // log basic info
3646
3647 m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << (int)NUM_SAMPLES
3648 << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
3649 "tested before samples at odd positions.\n"
3650 << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
3651 << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;
3652
3653 // log which transfer rate is the test result and buffer info
3654
3655 m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples.\n"
3656 << "Buffer usage = " << glu::getUsageName(m_bufferUsage) << tcu::TestLog::EndMessage;
3657
3658 // Set state for drawing so that we don't have to change these during the iteration
3659 {
3660 gl.useProgram(m_minimalProgram->getProgram());
3661 gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
3662 gl.enableVertexAttribArray(m_minimalProgramPosLoc);
3663 }
3664 }
3665
3666 template <typename SampleType>
deinit(void)3667 void ModifyAfterBasicCase<SampleType>::deinit(void)
3668 {
3669 m_zeroData = std::vector<uint8_t>();
3670
3671 BasicBufferCase<SampleType>::deinit();
3672 }
3673
3674 template <typename SampleType>
drawBufferRange(int begin,int end)3675 void ModifyAfterBasicCase<SampleType>::drawBufferRange(int begin, int end)
3676 {
3677 DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3678 DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3679
3680 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3681
3682 // use given range
3683 gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3684 gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3685 }
3686
3687 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)3688 bool ModifyAfterBasicCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
3689 {
3690 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3691 const int bufferSize = sample.bufferSize;
3692 bool testOk;
3693
3694 testOk = prepareAndRunTest(iteration, sample, bufferSize);
3695 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3696
3697 if (!testOk)
3698 {
3699 const int unmapFailureThreshold = 4;
3700
3701 // only unmapping error can cause iteration failure
3702 if (++m_unmappingErrors >= unmapFailureThreshold)
3703 throw tcu::TestError("Too many unmapping errors, cannot continue.");
3704
3705 // just try again
3706 return false;
3707 }
3708
3709 return true;
3710 }
3711
3712 template <typename SampleType>
prepareAndRunTest(int iteration,UploadSampleResult<SampleType> & result,int bufferSize)3713 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result,
3714 int bufferSize)
3715 {
3716 DE_UNREF(iteration);
3717
3718 DE_ASSERT(!m_bufferID);
3719 DE_ASSERT(deIsAligned32(bufferSize, 4 * 4)); // aligned to vec4
3720
3721 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3722 bool testRunOk = true;
3723 bool unmappingFailed = false;
3724
3725 // Upload initial buffer to the GPU...
3726 gl.genBuffers(1, &m_bufferID);
3727 gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3728 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3729
3730 // ...use it...
3731 gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3732 drawBufferRange(0, bufferSize);
3733
3734 // ..and make sure it is uploaded
3735 BasicBufferCase<SampleType>::waitGLResults();
3736
3737 // warmup CPU before the test to make sure the power management governor
3738 // keeps us in the "high performance" mode
3739 {
3740 deYield();
3741 tcu::warmupCPU();
3742 deYield();
3743 }
3744
3745 // test
3746 try
3747 {
3748 // buffer is uploaded to the GPU. Draw from it.
3749 drawBufferRange(0, bufferSize);
3750
3751 // and test upload
3752 testWithBufferSize(result, bufferSize);
3753 }
3754 catch (UnmapFailureError &)
3755 {
3756 testRunOk = false;
3757 unmappingFailed = true;
3758 }
3759
3760 // clean up: make sure buffer is not in upload queue and delete it
3761
3762 // sourcing unspecified data causes undefined results, possibly program termination
3763 if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3764 gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3765
3766 drawBufferRange(0, bufferSize);
3767 BasicBufferCase<SampleType>::waitGLResults();
3768
3769 gl.deleteBuffers(1, &m_bufferID);
3770 m_bufferID = 0;
3771
3772 return testRunOk;
3773 }
3774
3775 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)3776 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
3777 {
3778 const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3779
3780 // Return median transfer rate of the samples
3781
3782 if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3783 {
3784 // sample times are 1) invalid or 2) timer resolution too low
3785 // report speed 0 bytes / s since real value cannot be determined
3786 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3787 }
3788 else
3789 {
3790 // report transfer rate in MB / s
3791 m_testCtx.setTestResult(QP_TEST_RESULT_PASS,
3792 de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3793 }
3794 }
3795
3796 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3797 {
3798 public:
3799 enum CaseFlags
3800 {
3801 FLAG_RESPECIFY_SIZE = 0x1,
3802 FLAG_UPLOAD_REPEATED = 0x2,
3803 };
3804
3805 ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
3806 int bufferSizeMax, uint32_t usage, int flags);
3807 ~ModifyAfterWithBufferDataCase(void);
3808
3809 void init(void);
3810 void deinit(void);
3811
3812 private:
3813 void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3814
3815 enum
3816 {
3817 NUM_REPEATS = 2
3818 };
3819
3820 const bool m_respecifySize;
3821 const bool m_repeatedUpload;
3822 const float m_sizeDifferenceFactor;
3823 };
3824
ModifyAfterWithBufferDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags)3825 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc,
3826 int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3827 int flags)
3828 : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3829 , m_respecifySize((flags & FLAG_RESPECIFY_SIZE) != 0)
3830 , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
3831 , m_sizeDifferenceFactor(1.3f)
3832 {
3833 DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3834 }
3835
~ModifyAfterWithBufferDataCase(void)3836 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase(void)
3837 {
3838 deinit();
3839 }
3840
init(void)3841 void ModifyAfterWithBufferDataCase::init(void)
3842 {
3843 // Log the purpose of the test
3844
3845 if (m_repeatedUpload)
3846 m_testCtx.getLog() << tcu::TestLog::Message
3847 << "Testing performance of BufferData() command after \"specify buffer contents - draw "
3848 "buffer\" command pair is repeated "
3849 << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3850 else
3851 m_testCtx.getLog() << tcu::TestLog::Message
3852 << "Testing performance of BufferData() command after a draw command that sources data from "
3853 "the target buffer."
3854 << tcu::TestLog::EndMessage;
3855
3856 m_testCtx.getLog() << tcu::TestLog::Message
3857 << ((m_respecifySize) ?
3858 ("Buffer size is increased and contents are modified with BufferData().\n") :
3859 ("Buffer contents are modified with BufferData().\n"))
3860 << tcu::TestLog::EndMessage;
3861
3862 // init parent
3863 ModifyAfterBasicCase<SingleOperationDuration>::init();
3864
3865 // make sure our zeroBuffer is large enough
3866 if (m_respecifySize)
3867 {
3868 const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4 * 4);
3869 m_zeroData.resize(largerBufferSize, 0x00);
3870 }
3871 }
3872
deinit(void)3873 void ModifyAfterWithBufferDataCase::deinit(void)
3874 {
3875 ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3876 }
3877
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3878 void ModifyAfterWithBufferDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
3879 int bufferSize)
3880 {
3881 // always draw the same amount to make compares between cases sensible
3882 const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
3883 const int drawEnd = deAlign32(bufferSize * 3 / 4, 4 * 4);
3884
3885 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3886 const int largerBufferSize = deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4 * 4);
3887 const int newBufferSize = (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3888 uint64_t startTime;
3889 uint64_t endTime;
3890
3891 // repeat upload-draw
3892 if (m_repeatedUpload)
3893 {
3894 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3895 {
3896 gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3897 drawBufferRange(drawStart, drawEnd);
3898 }
3899 }
3900
3901 // test upload
3902 startTime = deGetMicroseconds();
3903 gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3904 endTime = deGetMicroseconds();
3905
3906 result.duration.totalDuration = endTime - startTime;
3907 result.duration.fitResponseDuration = result.duration.totalDuration;
3908 result.writtenSize = newBufferSize;
3909 }
3910
3911 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3912 {
3913 public:
3914 enum CaseFlags
3915 {
3916 FLAG_PARTIAL = 0x1,
3917 FLAG_UPLOAD_REPEATED = 0x2,
3918 };
3919
3920 ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
3921 int bufferSizeMax, uint32_t usage, int flags);
3922 ~ModifyAfterWithBufferSubDataCase(void);
3923
3924 void init(void);
3925 void deinit(void);
3926
3927 private:
3928 void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3929
3930 enum
3931 {
3932 NUM_REPEATS = 2
3933 };
3934
3935 const bool m_partialUpload;
3936 const bool m_repeatedUpload;
3937 };
3938
ModifyAfterWithBufferSubDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags)3939 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc,
3940 int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3941 int flags)
3942 : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3943 , m_partialUpload((flags & FLAG_PARTIAL) != 0)
3944 , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
3945 {
3946 }
3947
~ModifyAfterWithBufferSubDataCase(void)3948 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase(void)
3949 {
3950 deinit();
3951 }
3952
init(void)3953 void ModifyAfterWithBufferSubDataCase::init(void)
3954 {
3955 // Log the purpose of the test
3956
3957 if (m_repeatedUpload)
3958 m_testCtx.getLog() << tcu::TestLog::Message
3959 << "Testing performance of BufferSubData() command after \"specify buffer contents - draw "
3960 "buffer\" command pair is repeated "
3961 << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3962 else
3963 m_testCtx.getLog() << tcu::TestLog::Message
3964 << "Testing performance of BufferSubData() command after a draw command that sources data "
3965 "from the target buffer."
3966 << tcu::TestLog::EndMessage;
3967
3968 m_testCtx.getLog() << tcu::TestLog::Message
3969 << ((m_partialUpload) ? ("Half of the buffer contents are modified.\n") :
3970 ("Buffer contents are fully respecified.\n"))
3971 << tcu::TestLog::EndMessage;
3972
3973 ModifyAfterBasicCase<SingleOperationDuration>::init();
3974 }
3975
deinit(void)3976 void ModifyAfterWithBufferSubDataCase::deinit(void)
3977 {
3978 ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3979 }
3980
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3981 void ModifyAfterWithBufferSubDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
3982 int bufferSize)
3983 {
3984 // always draw the same amount to make compares between cases sensible
3985 const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
3986 const int drawEnd = deAlign32(bufferSize * 3 / 4, 4 * 4);
3987
3988 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3989 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
3990 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
3991 uint64_t startTime;
3992 uint64_t endTime;
3993
3994 // make upload-draw stream
3995 if (m_repeatedUpload)
3996 {
3997 for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3998 {
3999 gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
4000 drawBufferRange(drawStart, drawEnd);
4001 }
4002 }
4003
4004 // test upload
4005 startTime = deGetMicroseconds();
4006 gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
4007 endTime = deGetMicroseconds();
4008
4009 result.duration.totalDuration = endTime - startTime;
4010 result.duration.fitResponseDuration = result.duration.totalDuration;
4011 result.writtenSize = subdataSize;
4012 }
4013
4014 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
4015 {
4016 public:
4017 enum CaseFlags
4018 {
4019 FLAG_PARTIAL = 0x1,
4020 };
4021
4022 ModifyAfterWithMapBufferRangeCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
4023 int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
4024 ~ModifyAfterWithMapBufferRangeCase(void);
4025
4026 void init(void);
4027 void deinit(void);
4028
4029 private:
4030 static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
4031 void testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result, int bufferSize);
4032
4033 const bool m_partialUpload;
4034 const uint32_t m_mapFlags;
4035 };
4036
ModifyAfterWithMapBufferRangeCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags,uint32_t glMapFlags)4037 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase(Context &context, const char *name,
4038 const char *desc, int bufferSizeMin,
4039 int bufferSizeMax, uint32_t usage, int flags,
4040 uint32_t glMapFlags)
4041 : ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
4042 isBufferUnspecifiedAfterUpload(flags, glMapFlags))
4043 , m_partialUpload((flags & FLAG_PARTIAL) != 0)
4044 , m_mapFlags(glMapFlags)
4045 {
4046 }
4047
~ModifyAfterWithMapBufferRangeCase(void)4048 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase(void)
4049 {
4050 deinit();
4051 }
4052
init(void)4053 void ModifyAfterWithMapBufferRangeCase::init(void)
4054 {
4055 // Log the purpose of the test
4056
4057 m_testCtx.getLog() << tcu::TestLog::Message
4058 << "Testing performance of MapBufferRange() command after a draw command that sources data from "
4059 "the target buffer.\n"
4060 << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
4061 << "Map bits:\n"
4062 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
4063 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
4064 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
4065 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
4066 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
4067 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
4068 << tcu::TestLog::EndMessage;
4069
4070 ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
4071 }
4072
deinit(void)4073 void ModifyAfterWithMapBufferRangeCase::deinit(void)
4074 {
4075 ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
4076 }
4077
isBufferUnspecifiedAfterUpload(int flags,uint32_t mapFlags)4078 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
4079 {
4080 if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
4081 return true;
4082
4083 return false;
4084 }
4085
testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> & result,int bufferSize)4086 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result,
4087 int bufferSize)
4088 {
4089 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4090 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
4091 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
4092 void *mapPtr;
4093
4094 // map
4095 {
4096 uint64_t startTime;
4097 uint64_t endTime;
4098
4099 startTime = deGetMicroseconds();
4100 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
4101 endTime = deGetMicroseconds();
4102
4103 if (!mapPtr)
4104 throw tcu::TestError("mapBufferRange returned null");
4105
4106 result.duration.mapDuration = endTime - startTime;
4107 }
4108
4109 // write
4110 {
4111 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
4112 }
4113
4114 // unmap
4115 {
4116 uint64_t startTime;
4117 uint64_t endTime;
4118 glw::GLboolean unmapSucceeded;
4119
4120 startTime = deGetMicroseconds();
4121 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
4122 endTime = deGetMicroseconds();
4123
4124 if (unmapSucceeded != GL_TRUE)
4125 throw UnmapFailureError();
4126
4127 result.duration.unmapDuration = endTime - startTime;
4128 }
4129
4130 result.duration.totalDuration =
4131 result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
4132 result.duration.fitResponseDuration = result.duration.totalDuration;
4133 result.writtenSize = subdataSize;
4134 }
4135
4136 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
4137 {
4138 public:
4139 enum CaseFlags
4140 {
4141 FLAG_PARTIAL = 0x1,
4142 };
4143
4144 ModifyAfterWithMapBufferFlushCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
4145 int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
4146 ~ModifyAfterWithMapBufferFlushCase(void);
4147
4148 void init(void);
4149 void deinit(void);
4150
4151 private:
4152 static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
4153 void testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize);
4154
4155 const bool m_partialUpload;
4156 const uint32_t m_mapFlags;
4157 };
4158
ModifyAfterWithMapBufferFlushCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags,uint32_t glMapFlags)4159 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase(Context &context, const char *name,
4160 const char *desc, int bufferSizeMin,
4161 int bufferSizeMax, uint32_t usage, int flags,
4162 uint32_t glMapFlags)
4163 : ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
4164 isBufferUnspecifiedAfterUpload(flags, glMapFlags))
4165 , m_partialUpload((flags & FLAG_PARTIAL) != 0)
4166 , m_mapFlags(glMapFlags)
4167 {
4168 }
4169
~ModifyAfterWithMapBufferFlushCase(void)4170 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase(void)
4171 {
4172 deinit();
4173 }
4174
init(void)4175 void ModifyAfterWithMapBufferFlushCase::init(void)
4176 {
4177 // Log the purpose of the test
4178
4179 m_testCtx.getLog() << tcu::TestLog::Message
4180 << "Testing performance of MapBufferRange() command after a draw command that sources data from "
4181 "the target buffer.\n"
4182 << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
4183 << "Map bits:\n"
4184 << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
4185 << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
4186 << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
4187 << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
4188 << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
4189 << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
4190 << tcu::TestLog::EndMessage;
4191
4192 ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
4193 }
4194
deinit(void)4195 void ModifyAfterWithMapBufferFlushCase::deinit(void)
4196 {
4197 ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
4198 }
4199
isBufferUnspecifiedAfterUpload(int flags,uint32_t mapFlags)4200 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
4201 {
4202 if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
4203 return true;
4204
4205 return false;
4206 }
4207
testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> & result,int bufferSize)4208 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize(
4209 UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize)
4210 {
4211 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4212 const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
4213 const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
4214 void *mapPtr;
4215
4216 // map
4217 {
4218 uint64_t startTime;
4219 uint64_t endTime;
4220
4221 startTime = deGetMicroseconds();
4222 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
4223 endTime = deGetMicroseconds();
4224
4225 if (!mapPtr)
4226 throw tcu::TestError("mapBufferRange returned null");
4227
4228 result.duration.mapDuration = endTime - startTime;
4229 }
4230
4231 // write
4232 {
4233 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
4234 }
4235
4236 // flush
4237 {
4238 uint64_t startTime;
4239 uint64_t endTime;
4240
4241 startTime = deGetMicroseconds();
4242 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
4243 endTime = deGetMicroseconds();
4244
4245 result.duration.flushDuration = endTime - startTime;
4246 }
4247
4248 // unmap
4249 {
4250 uint64_t startTime;
4251 uint64_t endTime;
4252 glw::GLboolean unmapSucceeded;
4253
4254 startTime = deGetMicroseconds();
4255 unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
4256 endTime = deGetMicroseconds();
4257
4258 if (unmapSucceeded != GL_TRUE)
4259 throw UnmapFailureError();
4260
4261 result.duration.unmapDuration = endTime - startTime;
4262 }
4263
4264 result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
4265 result.duration.unmapDuration + result.duration.flushDuration;
4266 result.duration.fitResponseDuration = result.duration.totalDuration;
4267 result.writtenSize = subdataSize;
4268 }
4269
4270 enum DrawMethod
4271 {
4272 DRAWMETHOD_DRAW_ARRAYS = 0,
4273 DRAWMETHOD_DRAW_ELEMENTS,
4274
4275 DRAWMETHOD_LAST
4276 };
4277
4278 enum TargetBuffer
4279 {
4280 TARGETBUFFER_VERTEX = 0,
4281 TARGETBUFFER_INDEX,
4282
4283 TARGETBUFFER_LAST
4284 };
4285
4286 enum BufferState
4287 {
4288 BUFFERSTATE_NEW = 0,
4289 BUFFERSTATE_EXISTING,
4290
4291 BUFFERSTATE_LAST
4292 };
4293
4294 enum UploadMethod
4295 {
4296 UPLOADMETHOD_BUFFER_DATA = 0,
4297 UPLOADMETHOD_BUFFER_SUB_DATA,
4298 UPLOADMETHOD_MAP_BUFFER_RANGE,
4299
4300 UPLOADMETHOD_LAST
4301 };
4302
4303 enum UnrelatedBufferType
4304 {
4305 UNRELATEDBUFFERTYPE_NONE = 0,
4306 UNRELATEDBUFFERTYPE_VERTEX,
4307
4308 UNRELATEDBUFFERTYPE_LAST
4309 };
4310
4311 enum UploadRange
4312 {
4313 UPLOADRANGE_FULL = 0,
4314 UPLOADRANGE_PARTIAL,
4315
4316 UPLOADRANGE_LAST
4317 };
4318
4319 struct LayeredGridSpec
4320 {
4321 int gridWidth;
4322 int gridHeight;
4323 int gridLayers;
4324 };
4325
getLayeredGridNumVertices(const LayeredGridSpec & scene)4326 static int getLayeredGridNumVertices(const LayeredGridSpec &scene)
4327 {
4328 return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
4329 }
4330
generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> & vertexData,const LayeredGridSpec & scene)4331 static void generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> &vertexData, const LayeredGridSpec &scene)
4332 {
4333 // interleave color & vertex data
4334 const tcu::Vec4 green(0.0f, 1.0f, 0.0f, 0.7f);
4335 const tcu::Vec4 yellow(1.0f, 1.0f, 0.0f, 0.8f);
4336
4337 vertexData.resize(getLayeredGridNumVertices(scene) * 2);
4338
4339 for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
4340 for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
4341 for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
4342 {
4343 const tcu::Vec4 color = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
4344 const float cellLeft = (float(cellX) / (float)scene.gridWidth - 0.5f) * 2.0f;
4345 const float cellRight = (float(cellX + 1) / (float)scene.gridWidth - 0.5f) * 2.0f;
4346 const float cellTop = (float(cellY + 1) / (float)scene.gridHeight - 0.5f) * 2.0f;
4347 const float cellBottom = (float(cellY) / (float)scene.gridHeight - 0.5f) * 2.0f;
4348
4349 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 0] =
4350 color;
4351 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 1] =
4352 tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
4353
4354 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 2] =
4355 color;
4356 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 3] =
4357 tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
4358
4359 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 4] =
4360 color;
4361 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 5] =
4362 tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
4363
4364 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 6] =
4365 color;
4366 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 7] =
4367 tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
4368
4369 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 8] =
4370 color;
4371 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 9] =
4372 tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
4373
4374 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] =
4375 color;
4376 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] =
4377 tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
4378 }
4379 }
4380
generateLayeredGridIndexData(std::vector<uint32_t> & indexData,const LayeredGridSpec & scene)4381 static void generateLayeredGridIndexData(std::vector<uint32_t> &indexData, const LayeredGridSpec &scene)
4382 {
4383 indexData.resize(getLayeredGridNumVertices(scene) * 2);
4384
4385 for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
4386 indexData[ndx] = ndx;
4387 }
4388
4389 class RenderPerformanceTestBase : public TestCase
4390 {
4391 public:
4392 RenderPerformanceTestBase(Context &context, const char *name, const char *description);
4393 ~RenderPerformanceTestBase(void);
4394
4395 protected:
4396 void init(void);
4397 void deinit(void);
4398
4399 void waitGLResults(void) const;
4400 void setupVertexAttribs(void) const;
4401
4402 enum
4403 {
4404 RENDER_AREA_SIZE = 128
4405 };
4406
4407 private:
4408 glu::ShaderProgram *m_renderProgram;
4409 int m_colorLoc;
4410 int m_positionLoc;
4411 };
4412
RenderPerformanceTestBase(Context & context,const char * name,const char * description)4413 RenderPerformanceTestBase::RenderPerformanceTestBase(Context &context, const char *name, const char *description)
4414 : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
4415 , m_renderProgram(DE_NULL)
4416 , m_colorLoc(0)
4417 , m_positionLoc(0)
4418 {
4419 }
4420
~RenderPerformanceTestBase(void)4421 RenderPerformanceTestBase::~RenderPerformanceTestBase(void)
4422 {
4423 deinit();
4424 }
4425
init(void)4426 void RenderPerformanceTestBase::init(void)
4427 {
4428 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4429
4430 m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(),
4431 glu::ProgramSources() << glu::VertexSource(s_colorVertexShader)
4432 << glu::FragmentSource(s_colorFragmentShader));
4433 if (!m_renderProgram->isOk())
4434 {
4435 m_testCtx.getLog() << *m_renderProgram;
4436 throw tcu::TestError("could not build program");
4437 }
4438
4439 m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
4440 m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
4441
4442 if (m_colorLoc == -1)
4443 throw tcu::TestError("Location of attribute a_color was -1");
4444 if (m_positionLoc == -1)
4445 throw tcu::TestError("Location of attribute a_position was -1");
4446 }
4447
deinit(void)4448 void RenderPerformanceTestBase::deinit(void)
4449 {
4450 delete m_renderProgram;
4451 m_renderProgram = DE_NULL;
4452 }
4453
setupVertexAttribs(void) const4454 void RenderPerformanceTestBase::setupVertexAttribs(void) const
4455 {
4456 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4457
4458 // buffers are bound
4459
4460 gl.enableVertexAttribArray(m_colorLoc);
4461 gl.enableVertexAttribArray(m_positionLoc);
4462
4463 gl.vertexAttribPointer(m_colorLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
4464 glu::BufferOffsetAsPointer(0 * sizeof(tcu::Vec4)));
4465 gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
4466 glu::BufferOffsetAsPointer(1 * sizeof(tcu::Vec4)));
4467
4468 gl.useProgram(m_renderProgram->getProgram());
4469
4470 GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4471 }
4472
waitGLResults(void) const4473 void RenderPerformanceTestBase::waitGLResults(void) const
4474 {
4475 tcu::Surface unusedSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4476 glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
4477 }
4478
4479 template <typename SampleType>
4480 class RenderCase : public RenderPerformanceTestBase
4481 {
4482 public:
4483 RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);
4484 ~RenderCase(void);
4485
4486 protected:
4487 void init(void);
4488 void deinit(void);
4489
4490 private:
4491 IterateResult iterate(void);
4492
4493 protected:
4494 struct SampleResult
4495 {
4496 LayeredGridSpec scene;
4497 RenderSampleResult<SampleType> result;
4498 };
4499
4500 int getMinWorkloadSize(void) const;
4501 int getMaxWorkloadSize(void) const;
4502 int getMinWorkloadDataSize(void) const;
4503 int getMaxWorkloadDataSize(void) const;
4504 int getVertexDataSize(void) const;
4505 int getNumSamples(void) const;
4506 void uploadScene(const LayeredGridSpec &scene);
4507
4508 virtual void runSample(SampleResult &sample) = 0;
4509 virtual void logAndSetTestResult(const std::vector<SampleResult> &results);
4510
4511 void mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
4512 const std::vector<SampleResult> &src) const;
4513
4514 const DrawMethod m_drawMethod;
4515
4516 private:
4517 glw::GLuint m_attributeBufferID;
4518 glw::GLuint m_indexBufferID;
4519 int m_iterationNdx;
4520 std::vector<int> m_iterationOrder;
4521 std::vector<SampleResult> m_results;
4522 int m_numUnmapFailures;
4523 };
4524
4525 template <typename SampleType>
RenderCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4526 RenderCase<SampleType>::RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod)
4527 : RenderPerformanceTestBase(context, name, description)
4528 , m_drawMethod(drawMethod)
4529 , m_attributeBufferID(0)
4530 , m_indexBufferID(0)
4531 , m_iterationNdx(0)
4532 , m_numUnmapFailures(0)
4533 {
4534 DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4535 }
4536
4537 template <typename SampleType>
~RenderCase(void)4538 RenderCase<SampleType>::~RenderCase(void)
4539 {
4540 deinit();
4541 }
4542
4543 template <typename SampleType>
init(void)4544 void RenderCase<SampleType>::init(void)
4545 {
4546 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4547
4548 RenderPerformanceTestBase::init();
4549
4550 // requirements
4551
4552 if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4553 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4554 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
4555 de::toString<int>(RENDER_AREA_SIZE) + " render target");
4556
4557 // gl state
4558
4559 gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4560
4561 // enable bleding to prevent grid layers from being discarded
4562 gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4563 gl.blendEquation(GL_FUNC_ADD);
4564 gl.enable(GL_BLEND);
4565
4566 // generate iterations
4567
4568 {
4569 const int gridSizes[] = {20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80, 86, 92, 98, 104, 110, 116, 122, 128};
4570
4571 for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4572 {
4573 m_results.push_back(SampleResult());
4574
4575 m_results.back().scene.gridHeight = gridSizes[gridNdx];
4576 m_results.back().scene.gridWidth = gridSizes[gridNdx];
4577 m_results.back().scene.gridLayers = 5;
4578
4579 m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4580
4581 // test cases set these, initialize to unused values
4582 m_results.back().result.renderDataSize = -1;
4583 m_results.back().result.uploadedDataSize = -1;
4584 m_results.back().result.unrelatedDataSize = -1;
4585 }
4586 }
4587
4588 // randomize iteration order
4589 {
4590 m_iterationOrder.resize(m_results.size());
4591 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4592 }
4593 }
4594
4595 template <typename SampleType>
deinit(void)4596 void RenderCase<SampleType>::deinit(void)
4597 {
4598 RenderPerformanceTestBase::deinit();
4599
4600 if (m_attributeBufferID)
4601 {
4602 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4603 m_attributeBufferID = 0;
4604 }
4605
4606 if (m_indexBufferID)
4607 {
4608 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4609 m_indexBufferID = 0;
4610 }
4611 }
4612
4613 template <typename SampleType>
iterate(void)4614 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate(void)
4615 {
4616 const int unmapFailureThreshold = 3;
4617 const int currentIteration = m_iterationNdx;
4618 const int currentConfigNdx = m_iterationOrder[currentIteration];
4619 SampleResult ¤tSample = m_results[currentConfigNdx];
4620
4621 try
4622 {
4623 runSample(currentSample);
4624 ++m_iterationNdx;
4625 }
4626 catch (const UnmapFailureError &ex)
4627 {
4628 DE_UNREF(ex);
4629 ++m_numUnmapFailures;
4630 }
4631
4632 if (m_numUnmapFailures > unmapFailureThreshold)
4633 throw tcu::TestError("Got too many unmap errors");
4634
4635 if (m_iterationNdx < (int)m_iterationOrder.size())
4636 return CONTINUE;
4637
4638 logAndSetTestResult(m_results);
4639 return STOP;
4640 }
4641
4642 template <typename SampleType>
getMinWorkloadSize(void) const4643 int RenderCase<SampleType>::getMinWorkloadSize(void) const
4644 {
4645 int result = getLayeredGridNumVertices(m_results[0].scene);
4646
4647 for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4648 {
4649 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4650 result = de::min(result, workloadSize);
4651 }
4652
4653 return result;
4654 }
4655
4656 template <typename SampleType>
getMaxWorkloadSize(void) const4657 int RenderCase<SampleType>::getMaxWorkloadSize(void) const
4658 {
4659 int result = getLayeredGridNumVertices(m_results[0].scene);
4660
4661 for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4662 {
4663 const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4664 result = de::max(result, workloadSize);
4665 }
4666
4667 return result;
4668 }
4669
4670 template <typename SampleType>
getMinWorkloadDataSize(void) const4671 int RenderCase<SampleType>::getMinWorkloadDataSize(void) const
4672 {
4673 return getMinWorkloadSize() * getVertexDataSize();
4674 }
4675
4676 template <typename SampleType>
getMaxWorkloadDataSize(void) const4677 int RenderCase<SampleType>::getMaxWorkloadDataSize(void) const
4678 {
4679 return getMaxWorkloadSize() * getVertexDataSize();
4680 }
4681
4682 template <typename SampleType>
getVertexDataSize(void) const4683 int RenderCase<SampleType>::getVertexDataSize(void) const
4684 {
4685 const int numVectors = 2;
4686 const int vec4Size = 4 * sizeof(float);
4687
4688 return numVectors * vec4Size;
4689 }
4690
4691 template <typename SampleType>
getNumSamples(void) const4692 int RenderCase<SampleType>::getNumSamples(void) const
4693 {
4694 return (int)m_results.size();
4695 }
4696
4697 template <typename SampleType>
uploadScene(const LayeredGridSpec & scene)4698 void RenderCase<SampleType>::uploadScene(const LayeredGridSpec &scene)
4699 {
4700 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4701
4702 // vertex buffer
4703 {
4704 std::vector<tcu::Vec4> vertexData;
4705
4706 generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4707
4708 if (m_attributeBufferID == 0)
4709 gl.genBuffers(1, &m_attributeBufferID);
4710 gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4711 gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4712 }
4713
4714 // index buffer
4715 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4716 {
4717 std::vector<uint32_t> indexData;
4718
4719 generateLayeredGridIndexData(indexData, scene);
4720
4721 if (m_indexBufferID == 0)
4722 gl.genBuffers(1, &m_indexBufferID);
4723 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4724 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4725 GL_STATIC_DRAW);
4726 }
4727
4728 GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4729 }
4730
4731 template <typename SampleType>
logAndSetTestResult(const std::vector<SampleResult> & results)4732 void RenderCase<SampleType>::logAndSetTestResult(const std::vector<SampleResult> &results)
4733 {
4734 std::vector<RenderSampleResult<SampleType>> mappedResults;
4735
4736 mapResultsToRenderRateFormat(mappedResults, results);
4737
4738 {
4739 const RenderSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4740 const float rate = analysis.renderRateAtRange;
4741
4742 if (rate == std::numeric_limits<float>::infinity())
4743 {
4744 // sample times are 1) invalid or 2) timer resolution too low
4745 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4746 }
4747 else
4748 {
4749 // report transfer rate in millions of MiB/s
4750 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4751 }
4752 }
4753 }
4754
4755 template <typename SampleType>
mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> & dst,const std::vector<SampleResult> & src) const4756 void RenderCase<SampleType>::mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
4757 const std::vector<SampleResult> &src) const
4758 {
4759 dst.resize(src.size());
4760
4761 for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4762 dst[ndx] = src[ndx].result;
4763 }
4764
4765 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4766 {
4767 public:
4768 ReferenceRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);
4769
4770 private:
4771 void init(void);
4772 void runSample(SampleResult &sample);
4773 };
4774
ReferenceRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4775 ReferenceRenderTimeCase::ReferenceRenderTimeCase(Context &context, const char *name, const char *description,
4776 DrawMethod drawMethod)
4777 : RenderCase<RenderReadDuration>(context, name, description, drawMethod)
4778 {
4779 }
4780
init(void)4781 void ReferenceRenderTimeCase::init(void)
4782 {
4783 const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4784
4785 // init parent
4786 RenderCase<RenderReadDuration>::init();
4787
4788 // log
4789 m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
4790 << " and readPixels call with different rendering workloads.\n"
4791 << getNumSamples() << " test samples. Sample order is randomized.\n"
4792 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4793 << "Generated workload is multiple viewport-covering grids with varying number of cells, each "
4794 "cell is two separate triangles.\n"
4795 << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize()
4796 << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4797 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4798 << "Test result is the approximated total processing rate in MiB / s.\n"
4799 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4800 ("Note that index array size is not included in the processed size.\n") :
4801 (""))
4802 << "Note! Test result should only be used as a baseline reference result for "
4803 "buffer.data_upload.* test group results."
4804 << tcu::TestLog::EndMessage;
4805 }
4806
runSample(SampleResult & sample)4807 void ReferenceRenderTimeCase::runSample(SampleResult &sample)
4808 {
4809 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4810 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4811 const int numVertices = getLayeredGridNumVertices(sample.scene);
4812 const glu::Buffer arrayBuffer(m_context.getRenderContext());
4813 const glu::Buffer indexBuffer(m_context.getRenderContext());
4814 std::vector<tcu::Vec4> vertexData;
4815 std::vector<uint32_t> indexData;
4816 uint64_t startTime;
4817 uint64_t endTime;
4818
4819 // generate and upload buffers
4820
4821 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4822 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4823 gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4824
4825 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4826 {
4827 generateLayeredGridIndexData(indexData, sample.scene);
4828 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4829 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4830 GL_STATIC_DRAW);
4831 }
4832
4833 setupVertexAttribs();
4834
4835 // make sure data is uploaded
4836
4837 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4838 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4839 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4840 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4841 else
4842 DE_ASSERT(false);
4843 waitGLResults();
4844
4845 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4846 gl.clear(GL_COLOR_BUFFER_BIT);
4847 waitGLResults();
4848
4849 tcu::warmupCPU();
4850
4851 // Measure both draw and associated readpixels
4852 {
4853 startTime = deGetMicroseconds();
4854
4855 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4856 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4857 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4858 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4859 else
4860 DE_ASSERT(false);
4861
4862 endTime = deGetMicroseconds();
4863
4864 sample.result.duration.renderDuration = endTime - startTime;
4865 }
4866
4867 {
4868 startTime = deGetMicroseconds();
4869 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4870 endTime = deGetMicroseconds();
4871
4872 sample.result.duration.readDuration = endTime - startTime;
4873 }
4874
4875 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4876 sample.result.uploadedDataSize = 0;
4877 sample.result.unrelatedDataSize = 0;
4878 sample.result.duration.renderReadDuration =
4879 sample.result.duration.renderDuration + sample.result.duration.readDuration;
4880 sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4881 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4882 }
4883
4884 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4885 {
4886 public:
4887 UnrelatedUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
4888 UploadMethod unrelatedUploadMethod);
4889
4890 private:
4891 void init(void);
4892 void runSample(SampleResult &sample);
4893
4894 const UploadMethod m_unrelatedUploadMethod;
4895 };
4896
UnrelatedUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,UploadMethod unrelatedUploadMethod)4897 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase(Context &context, const char *name,
4898 const char *description, DrawMethod drawMethod,
4899 UploadMethod unrelatedUploadMethod)
4900 : RenderCase<UnrelatedUploadRenderReadDuration>(context, name, description, drawMethod)
4901 , m_unrelatedUploadMethod(unrelatedUploadMethod)
4902 {
4903 DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4904 }
4905
init(void)4906 void UnrelatedUploadRenderTimeCase::init(void)
4907 {
4908 const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4909 tcu::MessageBuilder message(&m_testCtx.getLog());
4910
4911 // init parent
4912 RenderCase<UnrelatedUploadRenderReadDuration>::init();
4913
4914 // log
4915
4916 message << "Measuring the time used in " << targetFunctionName
4917 << " and readPixels call with different rendering workloads.\n"
4918 << "Uploading an unrelated buffer just before issuing the rendering command with "
4919 << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") :
4920 (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
4921 (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange") :
4922 ((const char *)DE_NULL))
4923 << ".\n"
4924 << getNumSamples() << " test samples. Sample order is randomized.\n"
4925 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4926 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
4927 "separate triangles.\n"
4928 << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize()
4929 << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4930 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4931 << "Unrelated upload sizes are in the range [" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4932 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4933 << "Test result is the approximated total processing rate in MiB / s.\n"
4934 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4935 ("Note that index array size is not included in the processed size.\n") :
4936 (""))
4937 << "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4938 << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
4939 "and upload_and_draw.*_and_unrelated_upload group results.\n"
4940 << tcu::TestLog::EndMessage;
4941 }
4942
runSample(SampleResult & sample)4943 void UnrelatedUploadRenderTimeCase::runSample(SampleResult &sample)
4944 {
4945 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4946 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4947 const int numVertices = getLayeredGridNumVertices(sample.scene);
4948 const glu::Buffer arrayBuffer(m_context.getRenderContext());
4949 const glu::Buffer indexBuffer(m_context.getRenderContext());
4950 const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
4951 int unrelatedUploadSize = -1;
4952 int renderUploadSize;
4953 std::vector<tcu::Vec4> vertexData;
4954 std::vector<uint32_t> indexData;
4955 uint64_t startTime;
4956 uint64_t endTime;
4957
4958 // generate and upload buffers
4959
4960 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4961 renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4962
4963 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4964 gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4965
4966 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4967 {
4968 generateLayeredGridIndexData(indexData, sample.scene);
4969 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4970 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4971 GL_STATIC_DRAW);
4972 }
4973
4974 setupVertexAttribs();
4975
4976 // make sure data is uploaded
4977
4978 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4979 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4980 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4981 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4982 else
4983 DE_ASSERT(false);
4984 waitGLResults();
4985
4986 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4987 gl.clear(GL_COLOR_BUFFER_BIT);
4988 waitGLResults();
4989
4990 tcu::warmupCPU();
4991
4992 // Unrelated upload
4993 if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4994 {
4995 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4996
4997 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4998 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4999 }
5000 else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5001 {
5002 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5003
5004 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5005 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
5006 gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
5007 }
5008 else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5009 {
5010 void *mapPtr;
5011 glw::GLboolean unmapSuccessful;
5012
5013 unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5014
5015 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5016 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
5017
5018 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize,
5019 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
5020 GL_MAP_UNSYNCHRONIZED_BIT);
5021 if (!mapPtr)
5022 throw tcu::Exception("MapBufferRange returned NULL");
5023
5024 deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
5025
5026 // if unmapping fails, just try again later
5027 unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
5028 if (!unmapSuccessful)
5029 throw UnmapFailureError();
5030 }
5031 else
5032 DE_ASSERT(false);
5033
5034 DE_ASSERT(unrelatedUploadSize != -1);
5035
5036 // Measure both draw and associated readpixels
5037 {
5038 startTime = deGetMicroseconds();
5039
5040 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5041 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5042 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5043 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5044 else
5045 DE_ASSERT(false);
5046
5047 endTime = deGetMicroseconds();
5048
5049 sample.result.duration.renderDuration = endTime - startTime;
5050 }
5051
5052 {
5053 startTime = deGetMicroseconds();
5054 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5055 endTime = deGetMicroseconds();
5056
5057 sample.result.duration.readDuration = endTime - startTime;
5058 }
5059
5060 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5061 sample.result.uploadedDataSize = renderUploadSize;
5062 sample.result.unrelatedDataSize = unrelatedUploadSize;
5063 sample.result.duration.renderReadDuration =
5064 sample.result.duration.renderDuration + sample.result.duration.readDuration;
5065 sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5066 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5067 }
5068
5069 class ReferenceReadPixelsTimeCase : public TestCase
5070 {
5071 public:
5072 ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description);
5073
5074 private:
5075 void init(void);
5076 IterateResult iterate(void);
5077 void logAndSetTestResult(void);
5078
5079 enum
5080 {
5081 RENDER_AREA_SIZE = 128
5082 };
5083
5084 const int m_numSamples;
5085 int m_sampleNdx;
5086 std::vector<int> m_samples;
5087 };
5088
ReferenceReadPixelsTimeCase(Context & context,const char * name,const char * description)5089 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description)
5090 : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
5091 , m_numSamples(20)
5092 , m_sampleNdx(0)
5093 , m_samples(m_numSamples)
5094 {
5095 }
5096
init(void)5097 void ReferenceReadPixelsTimeCase::init(void)
5098 {
5099 m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in a single readPixels call with "
5100 << m_numSamples << " test samples.\n"
5101 << "Test result is the median of the samples in microseconds.\n"
5102 << "Note! Test result should only be used as a baseline reference result for "
5103 "buffer.data_upload.* test group results."
5104 << tcu::TestLog::EndMessage;
5105 }
5106
iterate(void)5107 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate(void)
5108 {
5109 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5110 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5111 uint64_t startTime;
5112 uint64_t endTime;
5113
5114 deYield();
5115 tcu::warmupCPU();
5116 deYield();
5117
5118 // "Render" something and wait for it
5119 gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f);
5120 gl.clear(GL_COLOR_BUFFER_BIT);
5121
5122 // wait for results
5123 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5124
5125 // measure time used in readPixels
5126 startTime = deGetMicroseconds();
5127 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5128 endTime = deGetMicroseconds();
5129
5130 m_samples[m_sampleNdx] = (int)(endTime - startTime);
5131
5132 if (++m_sampleNdx < m_numSamples)
5133 return CONTINUE;
5134
5135 logAndSetTestResult();
5136 return STOP;
5137 }
5138
logAndSetTestResult(void)5139 void ReferenceReadPixelsTimeCase::logAndSetTestResult(void)
5140 {
5141 // Log sample list
5142 {
5143 m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
5144 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
5145 << tcu::TestLog::EndSampleInfo;
5146
5147 for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5148 m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx] << tcu::TestLog::EndSample;
5149
5150 m_testCtx.getLog() << tcu::TestLog::EndSampleList;
5151 }
5152
5153 // Log median
5154 {
5155 float median;
5156 float limit60Low;
5157 float limit60Up;
5158
5159 std::sort(m_samples.begin(), m_samples.end());
5160 median = linearSample(m_samples, 0.5f);
5161 limit60Low = linearSample(m_samples, 0.2f);
5162 limit60Up = linearSample(m_samples, 0.8f);
5163
5164 m_testCtx.getLog() << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
5165 << tcu::TestLog::Message << "60 % of samples within range:\n"
5166 << tcu::TestLog::EndMessage
5167 << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
5168 << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
5169
5170 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
5171 }
5172 }
5173
5174 template <typename SampleType>
5175 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
5176 {
5177 public:
5178 typedef typename RenderCase<SampleType>::SampleResult SampleResult;
5179
5180 GenericUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
5181 TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState,
5182 UploadRange uploadRange, UnrelatedBufferType unrelatedBufferType);
5183
5184 private:
5185 void init(void);
5186 void runSample(SampleResult &sample);
5187
5188 using RenderCase<SampleType>::RENDER_AREA_SIZE;
5189
5190 const TargetBuffer m_targetBuffer;
5191 const BufferState m_bufferState;
5192 const UploadMethod m_uploadMethod;
5193 const UnrelatedBufferType m_unrelatedBufferType;
5194 const UploadRange m_uploadRange;
5195
5196 using RenderCase<SampleType>::m_context;
5197 using RenderCase<SampleType>::m_testCtx;
5198 using RenderCase<SampleType>::m_drawMethod;
5199 };
5200
5201 template <typename SampleType>
GenericUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState,UploadRange uploadRange,UnrelatedBufferType unrelatedBufferType)5202 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase(Context &context, const char *name,
5203 const char *description, DrawMethod method,
5204 TargetBuffer targetBuffer,
5205 UploadMethod uploadMethod, BufferState bufferState,
5206 UploadRange uploadRange,
5207 UnrelatedBufferType unrelatedBufferType)
5208 : RenderCase<SampleType>(context, name, description, method)
5209 , m_targetBuffer(targetBuffer)
5210 , m_bufferState(bufferState)
5211 , m_uploadMethod(uploadMethod)
5212 , m_unrelatedBufferType(unrelatedBufferType)
5213 , m_uploadRange(uploadRange)
5214 {
5215 DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5216 DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
5217 DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5218 DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
5219 DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5220 }
5221
5222 template <typename SampleType>
init(void)5223 void GenericUploadRenderTimeCase<SampleType>::init(void)
5224 {
5225 // init parent
5226 RenderCase<SampleType>::init();
5227
5228 // log
5229 {
5230 const char *const targetFunctionName =
5231 (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5232 const int perVertexSize =
5233 (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(uint32_t)) : ((int)sizeof(tcu::Vec4[2]));
5234 const int fullMinUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
5235 const int fullMaxUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
5236 const int minUploadSize =
5237 (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize / 2, 4));
5238 const int maxUploadSize =
5239 (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize / 2, 4));
5240 const int minUnrelatedUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
5241 const int maxUnrelatedUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
5242
5243 m_testCtx.getLog()
5244 << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
5245 << " and readPixels call with different rendering workloads.\n"
5246 << "The " << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib")) << " buffer "
5247 << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents ")) << "sourced by the rendering command "
5248 << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded ") :
5249 (m_uploadRange == UPLOADRANGE_FULL) ? ("are specified ") :
5250 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("are updated (partial upload) ") :
5251 ((const char *)DE_NULL))
5252 << "just before issuing the rendering command.\n"
5253 << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") :
5254 ("The buffer is generated just before uploading.\n"))
5255 << "Buffer "
5256 << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded") :
5257 (m_uploadRange == UPLOADRANGE_FULL) ? ("contents are specified") :
5258 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("contents are partially updated") :
5259 ((const char *)DE_NULL))
5260 << " with "
5261 << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") :
5262 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
5263 ("mapBufferRange"))
5264 << " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
5265 << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
5266 ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | "
5267 "MAP_UNSYNCHRONIZED_BIT\n") :
5268 (""))
5269 << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
5270 ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") :
5271 (""))
5272 << RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
5273 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5274 << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
5275 "separate triangles.\n"
5276 << "Workload sizes are in the range [" << RenderCase<SampleType>::getMinWorkloadSize() << ", "
5277 << RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
5278 << "([" << getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
5279 << getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
5280 << "Upload sizes are in the range [" << getHumanReadableByteSize(minUploadSize) << ","
5281 << getHumanReadableByteSize(maxUploadSize) << "].\n"
5282 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
5283 ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) +
5284 ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
5285 (""))
5286 << "Test result is the approximated processing rate in MiB / s.\n"
5287 << "Note that while upload time is measured, the time used is not included in the results.\n"
5288 << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
5289 ("Note that the data size and the time used in the unrelated upload is not included in the "
5290 "results.\n") :
5291 (""))
5292 << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
5293 ("Note that index array size is not included in the processed size.\n") :
5294 (""))
5295 << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
5296 "and other upload_and_draw.* group results.\n"
5297 << tcu::TestLog::EndMessage;
5298 }
5299 }
5300
5301 template <typename SampleType>
runSample(SampleResult & sample)5302 void GenericUploadRenderTimeCase<SampleType>::runSample(SampleResult &sample)
5303 {
5304 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5305 const glu::Buffer arrayBuffer(m_context.getRenderContext());
5306 const glu::Buffer indexBuffer(m_context.getRenderContext());
5307 const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
5308 const int numVertices = getLayeredGridNumVertices(sample.scene);
5309 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5310 uint64_t startTime;
5311 uint64_t endTime;
5312 std::vector<tcu::Vec4> vertexData;
5313 std::vector<uint32_t> indexData;
5314
5315 // create data
5316
5317 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5318 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5319 generateLayeredGridIndexData(indexData, sample.scene);
5320
5321 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5322 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5323 RenderCase<SampleType>::setupVertexAttribs();
5324
5325 // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
5326
5327 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
5328 {
5329 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5330 GL_DYNAMIC_DRAW);
5331 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5332 }
5333 else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
5334 {
5335 // do not touch the vertex buffer
5336 }
5337 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
5338 {
5339 // hint that the target buffer will be modified soon
5340 const glw::GLenum vertexDataUsage =
5341 (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
5342 const glw::GLenum indexDataUsage =
5343 (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
5344
5345 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5346 vertexDataUsage);
5347 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
5348 indexDataUsage);
5349 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5350 }
5351 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
5352 {
5353 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5354 {
5355 // make the index buffer present on the gpu
5356 // use another vertex buffer to keep original buffer in unused state
5357 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5358
5359 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5360 RenderCase<SampleType>::setupVertexAttribs();
5361
5362 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5363 GL_STATIC_DRAW);
5364 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
5365 &indexData[0], GL_STATIC_DRAW);
5366 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5367
5368 // restore original state
5369 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5370 RenderCase<SampleType>::setupVertexAttribs();
5371 }
5372 else if (m_targetBuffer == TARGETBUFFER_INDEX)
5373 {
5374 // make the vertex buffer present on the gpu
5375 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5376 GL_STATIC_DRAW);
5377 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5378 }
5379 else
5380 DE_ASSERT(false);
5381 }
5382 else
5383 DE_ASSERT(false);
5384
5385 RenderCase<SampleType>::waitGLResults();
5386 GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5387
5388 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5389 gl.clear(GL_COLOR_BUFFER_BIT);
5390 RenderCase<SampleType>::waitGLResults();
5391
5392 tcu::warmupCPU();
5393
5394 // upload
5395
5396 {
5397 glw::GLenum target;
5398 glw::GLsizeiptr size;
5399 glw::GLintptr offset = 0;
5400 const void *source;
5401
5402 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5403 {
5404 target = GL_ARRAY_BUFFER;
5405 size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5406 source = &vertexData[0];
5407 }
5408 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5409 {
5410 target = GL_ELEMENT_ARRAY_BUFFER;
5411 size = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
5412 source = &indexData[0];
5413 }
5414 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5415 {
5416 DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
5417
5418 target = GL_ARRAY_BUFFER;
5419 size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5420 offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5421 source = (const uint8_t *)&vertexData[0] + offset;
5422 }
5423 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5424 {
5425 DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
5426
5427 // upload to 25% - 75% range
5428 target = GL_ELEMENT_ARRAY_BUFFER;
5429 size = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
5430 offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5431 source = (const uint8_t *)&indexData[0] + offset;
5432 }
5433 else
5434 {
5435 DE_ASSERT(false);
5436 return;
5437 }
5438
5439 startTime = deGetMicroseconds();
5440
5441 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5442 gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
5443 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5444 {
5445 // create buffer storage
5446 if (m_bufferState == BUFFERSTATE_NEW)
5447 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
5448 gl.bufferSubData(target, offset, size, source);
5449 }
5450 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5451 {
5452 void *mapPtr;
5453 glw::GLboolean unmapSuccessful;
5454
5455 // create buffer storage
5456 if (m_bufferState == BUFFERSTATE_NEW)
5457 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
5458
5459 mapPtr = gl.mapBufferRange(target, offset, size,
5460 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
5461 GL_MAP_UNSYNCHRONIZED_BIT);
5462 if (!mapPtr)
5463 throw tcu::Exception("MapBufferRange returned NULL");
5464
5465 deMemcpy(mapPtr, source, (int)size);
5466
5467 // if unmapping fails, just try again later
5468 unmapSuccessful = gl.unmapBuffer(target);
5469 if (!unmapSuccessful)
5470 throw UnmapFailureError();
5471 }
5472 else
5473 DE_ASSERT(false);
5474
5475 endTime = deGetMicroseconds();
5476
5477 sample.result.uploadedDataSize = (int)size;
5478 sample.result.duration.uploadDuration = endTime - startTime;
5479 }
5480
5481 // unrelated
5482 if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
5483 {
5484 const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5485
5486 gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5487 gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
5488 // Attibute pointers are not modified, no need restore state
5489
5490 sample.result.unrelatedDataSize = unrelatedUploadSize;
5491 }
5492
5493 // draw
5494 {
5495 startTime = deGetMicroseconds();
5496
5497 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5498 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5499 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5500 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5501 else
5502 DE_ASSERT(false);
5503
5504 endTime = deGetMicroseconds();
5505
5506 sample.result.duration.renderDuration = endTime - startTime;
5507 }
5508
5509 // read
5510 {
5511 startTime = deGetMicroseconds();
5512 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5513 endTime = deGetMicroseconds();
5514
5515 sample.result.duration.readDuration = endTime - startTime;
5516 }
5517
5518 // set results
5519
5520 sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5521
5522 sample.result.duration.renderReadDuration =
5523 sample.result.duration.renderDuration + sample.result.duration.readDuration;
5524 sample.result.duration.totalDuration = sample.result.duration.uploadDuration +
5525 sample.result.duration.renderDuration + sample.result.duration.readDuration;
5526 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5527 }
5528
5529 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5530 {
5531 public:
5532 enum MapFlags
5533 {
5534 MAPFLAG_NONE = 0,
5535 MAPFLAG_INVALIDATE_BUFFER,
5536 MAPFLAG_INVALIDATE_RANGE,
5537
5538 MAPFLAG_LAST
5539 };
5540 enum UploadBufferTarget
5541 {
5542 UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5543 UPLOADBUFFERTARGET_SAME_BUFFER,
5544
5545 UPLOADBUFFERTARGET_LAST
5546 };
5547 BufferInUseRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
5548 MapFlags mapFlags, TargetBuffer targetBuffer, UploadMethod uploadMethod,
5549 UploadRange uploadRange, UploadBufferTarget uploadTarget);
5550
5551 private:
5552 void init(void);
5553 void runSample(SampleResult &sample);
5554
5555 const TargetBuffer m_targetBuffer;
5556 const UploadMethod m_uploadMethod;
5557 const UploadRange m_uploadRange;
5558 const MapFlags m_mapFlags;
5559 const UploadBufferTarget m_uploadBufferTarget;
5560 };
5561
BufferInUseRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,MapFlags mapFlags,TargetBuffer targetBuffer,UploadMethod uploadMethod,UploadRange uploadRange,UploadBufferTarget uploadTarget)5562 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase(Context &context, const char *name, const char *description,
5563 DrawMethod method, MapFlags mapFlags, TargetBuffer targetBuffer,
5564 UploadMethod uploadMethod, UploadRange uploadRange,
5565 UploadBufferTarget uploadTarget)
5566 : RenderCase<RenderUploadRenderReadDuration>(context, name, description, method)
5567 , m_targetBuffer(targetBuffer)
5568 , m_uploadMethod(uploadMethod)
5569 , m_uploadRange(uploadRange)
5570 , m_mapFlags(mapFlags)
5571 , m_uploadBufferTarget(uploadTarget)
5572 {
5573 DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5574 DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5575 DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5576 DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5577 DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5578 }
5579
init(void)5580 void BufferInUseRenderTimeCase::init(void)
5581 {
5582 RenderCase<RenderUploadRenderReadDuration>::init();
5583
5584 // log
5585 {
5586 const char *const targetFunctionName =
5587 (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5588 const char *const uploadFunctionName = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") :
5589 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
5590 ("mapBufferRange");
5591 const bool isReferenceCase = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5592 tcu::MessageBuilder message(&m_testCtx.getLog());
5593
5594 message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5595 << targetFunctionName
5596 << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5597
5598 if (isReferenceCase)
5599 message << "Rendering:\n"
5600 << " before test: create and use buffers B and C\n"
5601 << " first draw: render using buffer B\n"
5602 << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer C contents\n") :
5603 (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer C contents\n") :
5604 ((const char *)DE_NULL))
5605 << " second draw: render using buffer C\n"
5606 << " read: readPixels\n";
5607 else
5608 message << "Rendering:\n"
5609 << " before test: create and use buffer B\n"
5610 << " first draw: render using buffer B\n"
5611 << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer B contents\n") :
5612 (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer B contents\n") :
5613 ((const char *)DE_NULL))
5614 << " second draw: render using buffer B\n"
5615 << " read: readPixels\n";
5616
5617 message << "Uploading using " << uploadFunctionName
5618 << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT") :
5619 (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ?
5620 (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT") :
5621 (m_mapFlags == MAPFLAG_NONE) ? ("") :
5622 ((const char *)DE_NULL))
5623 << "\n"
5624 << getNumSamples() << " test samples. Sample order is randomized.\n"
5625 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5626 << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize()
5627 << "] vertices "
5628 << "([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5629 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5630 << "Test result is the approximated processing rate in MiB / s of the second draw call and the "
5631 "readPixels call.\n";
5632
5633 if (isReferenceCase)
5634 message << "Note! Test result should only be used as a baseline reference result for "
5635 "buffer.render_after_upload.draw_modify_draw test group results.";
5636 else
5637 message << "Note! Test result may not be useful as is but instead should be compared against the "
5638 "buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5639
5640 message << tcu::TestLog::EndMessage;
5641 }
5642 }
5643
runSample(SampleResult & sample)5644 void BufferInUseRenderTimeCase::runSample(SampleResult &sample)
5645 {
5646 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5647 const glu::Buffer arrayBuffer(m_context.getRenderContext());
5648 const glu::Buffer indexBuffer(m_context.getRenderContext());
5649 const glu::Buffer alternativeUploadBuffer(m_context.getRenderContext());
5650 const int numVertices = getLayeredGridNumVertices(sample.scene);
5651 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5652 uint64_t startTime;
5653 uint64_t endTime;
5654 std::vector<tcu::Vec4> vertexData;
5655 std::vector<uint32_t> indexData;
5656
5657 // create data
5658
5659 generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5660 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5661 generateLayeredGridIndexData(indexData, sample.scene);
5662
5663 // make buffers used
5664
5665 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5666 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5667 setupVertexAttribs();
5668
5669 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5670 {
5671 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5672 GL_STREAM_DRAW);
5673 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5674 }
5675 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5676 {
5677 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5678 GL_STREAM_DRAW);
5679 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
5680 GL_STREAM_DRAW);
5681 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5682 }
5683 else
5684 DE_ASSERT(false);
5685
5686 // another pair of buffers for reference case
5687 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5688 {
5689 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5690 {
5691 gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5692 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5693 GL_STREAM_DRAW);
5694
5695 setupVertexAttribs();
5696 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5697 }
5698 else if (m_targetBuffer == TARGETBUFFER_INDEX)
5699 {
5700 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5701 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
5702 &indexData[0], GL_STREAM_DRAW);
5703 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5704 }
5705 else
5706 DE_ASSERT(false);
5707
5708 // restore state
5709 gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5710 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5711 setupVertexAttribs();
5712 }
5713
5714 waitGLResults();
5715 GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5716
5717 gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5718 gl.clear(GL_COLOR_BUFFER_BIT);
5719 waitGLResults();
5720
5721 tcu::warmupCPU();
5722
5723 // first draw
5724 {
5725 startTime = deGetMicroseconds();
5726
5727 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5728 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5729 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5730 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5731 else
5732 DE_ASSERT(false);
5733
5734 endTime = deGetMicroseconds();
5735
5736 sample.result.duration.firstRenderDuration = endTime - startTime;
5737 }
5738
5739 // upload
5740 {
5741 glw::GLenum target;
5742 glw::GLsizeiptr size;
5743 glw::GLintptr offset = 0;
5744 const void *source;
5745
5746 if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5747 {
5748 target = GL_ARRAY_BUFFER;
5749 size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5750 source = &vertexData[0];
5751 }
5752 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5753 {
5754 target = GL_ELEMENT_ARRAY_BUFFER;
5755 size = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
5756 source = &indexData[0];
5757 }
5758 else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5759 {
5760 target = GL_ARRAY_BUFFER;
5761 size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5762 offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5763 source = (const uint8_t *)&vertexData[0] + offset;
5764 }
5765 else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5766 {
5767 // upload to 25% - 75% range
5768 target = GL_ELEMENT_ARRAY_BUFFER;
5769 size = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
5770 offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5771 source = (const uint8_t *)&indexData[0] + offset;
5772 }
5773 else
5774 {
5775 DE_ASSERT(false);
5776 return;
5777 }
5778
5779 // reference case? don't modify the buffer in use
5780 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5781 gl.bindBuffer(target, *alternativeUploadBuffer);
5782
5783 startTime = deGetMicroseconds();
5784
5785 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5786 gl.bufferData(target, size, source, GL_STREAM_DRAW);
5787 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5788 gl.bufferSubData(target, offset, size, source);
5789 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5790 {
5791 const int mapFlags =
5792 (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) :
5793 (m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT) :
5794 (-1);
5795 void *mapPtr;
5796 glw::GLboolean unmapSuccessful;
5797
5798 mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5799 if (!mapPtr)
5800 throw tcu::Exception("MapBufferRange returned NULL");
5801
5802 deMemcpy(mapPtr, source, (int)size);
5803
5804 // if unmapping fails, just try again later
5805 unmapSuccessful = gl.unmapBuffer(target);
5806 if (!unmapSuccessful)
5807 throw UnmapFailureError();
5808 }
5809 else
5810 DE_ASSERT(false);
5811
5812 endTime = deGetMicroseconds();
5813
5814 sample.result.uploadedDataSize = (int)size;
5815 sample.result.duration.uploadDuration = endTime - startTime;
5816 }
5817
5818 // second draw
5819 {
5820 // Source vertex data from alternative buffer in refernce case
5821 if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5822 setupVertexAttribs();
5823
5824 startTime = deGetMicroseconds();
5825
5826 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5827 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5828 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5829 gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5830 else
5831 DE_ASSERT(false);
5832
5833 endTime = deGetMicroseconds();
5834
5835 sample.result.duration.secondRenderDuration = endTime - startTime;
5836 }
5837
5838 // read
5839 {
5840 startTime = deGetMicroseconds();
5841 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5842 endTime = deGetMicroseconds();
5843
5844 sample.result.duration.readDuration = endTime - startTime;
5845 }
5846
5847 // set results
5848
5849 sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5850
5851 sample.result.duration.renderReadDuration =
5852 sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5853 sample.result.duration.totalDuration =
5854 sample.result.duration.firstRenderDuration + sample.result.duration.uploadDuration +
5855 sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5856 sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5857 }
5858
5859 class UploadWaitDrawCase : public RenderPerformanceTestBase
5860 {
5861 public:
5862 struct Sample
5863 {
5864 int numFrames;
5865 uint64_t uploadCallEndTime;
5866 };
5867 struct Result
5868 {
5869 uint64_t uploadDuration;
5870 uint64_t renderDuration;
5871 uint64_t readDuration;
5872 uint64_t renderReadDuration;
5873
5874 uint64_t timeBeforeUse;
5875 };
5876
5877 UploadWaitDrawCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
5878 TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState);
5879 ~UploadWaitDrawCase(void);
5880
5881 private:
5882 void init(void);
5883 void deinit(void);
5884 IterateResult iterate(void);
5885
5886 void uploadBuffer(Sample &sample, Result &result);
5887 void drawFromBuffer(Sample &sample, Result &result);
5888 void reuseAndDeleteBuffer(void);
5889 void logAndSetTestResult(void);
5890 void logSamples(void);
5891 void drawMisc(void);
5892 int findStabilizationSample(uint64_t Result::*target, const char *description);
5893 bool checkSampleTemporalStability(uint64_t Result::*target, const char *description);
5894
5895 const DrawMethod m_drawMethod;
5896 const TargetBuffer m_targetBuffer;
5897 const UploadMethod m_uploadMethod;
5898 const BufferState m_bufferState;
5899
5900 const int m_numSamplesPerSwap;
5901 const int m_numMaxSwaps;
5902
5903 int m_frameNdx;
5904 int m_sampleNdx;
5905 int m_numVertices;
5906
5907 std::vector<tcu::Vec4> m_vertexData;
5908 std::vector<uint32_t> m_indexData;
5909 std::vector<Sample> m_samples;
5910 std::vector<Result> m_results;
5911 std::vector<int> m_iterationOrder;
5912
5913 uint32_t m_vertexBuffer;
5914 uint32_t m_indexBuffer;
5915 uint32_t m_miscBuffer;
5916 int m_numMiscVertices;
5917 };
5918
UploadWaitDrawCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState)5919 UploadWaitDrawCase::UploadWaitDrawCase(Context &context, const char *name, const char *description,
5920 DrawMethod drawMethod, TargetBuffer targetBuffer, UploadMethod uploadMethod,
5921 BufferState bufferState)
5922 : RenderPerformanceTestBase(context, name, description)
5923 , m_drawMethod(drawMethod)
5924 , m_targetBuffer(targetBuffer)
5925 , m_uploadMethod(uploadMethod)
5926 , m_bufferState(bufferState)
5927 , m_numSamplesPerSwap(10)
5928 , m_numMaxSwaps(4)
5929 , m_frameNdx(0)
5930 , m_sampleNdx(0)
5931 , m_numVertices(-1)
5932 , m_vertexBuffer(0)
5933 , m_indexBuffer(0)
5934 , m_miscBuffer(0)
5935 , m_numMiscVertices(-1)
5936 {
5937 }
5938
~UploadWaitDrawCase(void)5939 UploadWaitDrawCase::~UploadWaitDrawCase(void)
5940 {
5941 deinit();
5942 }
5943
init(void)5944 void UploadWaitDrawCase::init(void)
5945 {
5946 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5947 const int vertexAttribSize = (int)sizeof(tcu::Vec4) * 2; // color4, position4
5948 const int vertexIndexSize = (int)sizeof(uint32_t);
5949 const int vertexUploadDataSize = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5950
5951 RenderPerformanceTestBase::init();
5952
5953 // requirements
5954
5955 if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5956 m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5957 throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
5958 de::toString<int>(RENDER_AREA_SIZE) + " render target");
5959
5960 // gl state
5961
5962 gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5963
5964 // enable bleding to prevent grid layers from being discarded
5965
5966 gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5967 gl.blendEquation(GL_FUNC_ADD);
5968 gl.enable(GL_BLEND);
5969
5970 // scene
5971
5972 {
5973 LayeredGridSpec scene;
5974
5975 // create ~8MB workload with similar characteristics as in the other test
5976 // => makes comparison to other results more straightforward
5977 scene.gridWidth = 93;
5978 scene.gridHeight = 93;
5979 scene.gridLayers = 5;
5980
5981 generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5982 generateLayeredGridIndexData(m_indexData, scene);
5983 m_numVertices = getLayeredGridNumVertices(scene);
5984 }
5985
5986 // buffers
5987
5988 if (m_bufferState == BUFFERSTATE_NEW)
5989 {
5990 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5991 {
5992 // reads from two buffers, prepare the static buffer
5993
5994 if (m_targetBuffer == TARGETBUFFER_VERTEX)
5995 {
5996 // index buffer is static, use another vertex buffer to keep original buffer in unused state
5997 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5998
5999 gl.genBuffers(1, &m_indexBuffer);
6000 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
6001 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6002 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
6003 &m_vertexData[0], GL_STATIC_DRAW);
6004 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
6005 &m_indexData[0], GL_STATIC_DRAW);
6006
6007 setupVertexAttribs();
6008 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6009 }
6010 else if (m_targetBuffer == TARGETBUFFER_INDEX)
6011 {
6012 // vertex buffer is static
6013 gl.genBuffers(1, &m_vertexBuffer);
6014 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6015 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
6016 &m_vertexData[0], GL_STATIC_DRAW);
6017
6018 setupVertexAttribs();
6019 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6020 }
6021 else
6022 DE_ASSERT(false);
6023 }
6024 }
6025 else if (m_bufferState == BUFFERSTATE_EXISTING)
6026 {
6027 const glw::GLenum vertexUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
6028 const glw::GLenum indexUsage = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
6029
6030 gl.genBuffers(1, &m_vertexBuffer);
6031 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6032 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0],
6033 vertexUsage);
6034
6035 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6036 {
6037 gl.genBuffers(1, &m_indexBuffer);
6038 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6039 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
6040 &m_indexData[0], indexUsage);
6041 }
6042
6043 setupVertexAttribs();
6044
6045 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6046 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6047 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6048 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6049 else
6050 DE_ASSERT(false);
6051 }
6052 else
6053 DE_ASSERT(false);
6054
6055 // misc draw buffer
6056 {
6057 std::vector<tcu::Vec4> vertexData;
6058 LayeredGridSpec scene;
6059
6060 // create ~1.5MB workload with similar characteristics
6061 scene.gridWidth = 40;
6062 scene.gridHeight = 40;
6063 scene.gridLayers = 5;
6064
6065 generateLayeredGridVertexAttribData4C4V(vertexData, scene);
6066
6067 gl.genBuffers(1, &m_miscBuffer);
6068 gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6069 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0],
6070 GL_STATIC_DRAW);
6071
6072 m_numMiscVertices = getLayeredGridNumVertices(scene);
6073 }
6074
6075 // iterations
6076 {
6077 m_samples.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);
6078 m_results.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);
6079
6080 for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
6081 for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
6082 {
6083 const int index = numSwaps * m_numSamplesPerSwap + sampleNdx;
6084
6085 m_samples[index].numFrames = numSwaps;
6086 }
6087
6088 m_iterationOrder.resize(m_samples.size());
6089 generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
6090 }
6091
6092 // log
6093 m_testCtx.getLog()
6094 << tcu::TestLog::Message << "Measuring time used in "
6095 << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
6096 << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, "
6097 << m_numMaxSwaps << "].\n"
6098 << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index"))
6099 << " buffer.\n"
6100 << "Uploading using "
6101 << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ?
6102 ("bufferData") :
6103 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ?
6104 ("bufferSubData") :
6105 (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
6106 ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | "
6107 "GL_MAP_UNSYNCHRONIZED_BIT") :
6108 ((const char *)DE_NULL))
6109 << "\n"
6110 << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
6111 << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
6112 << "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
6113 << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
6114 << tcu::TestLog::EndMessage;
6115 }
6116
deinit(void)6117 void UploadWaitDrawCase::deinit(void)
6118 {
6119 RenderPerformanceTestBase::deinit();
6120
6121 if (m_vertexBuffer)
6122 {
6123 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
6124 m_vertexBuffer = 0;
6125 }
6126 if (m_indexBuffer)
6127 {
6128 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
6129 m_indexBuffer = 0;
6130 }
6131 if (m_miscBuffer)
6132 {
6133 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
6134 m_miscBuffer = 0;
6135 }
6136 }
6137
iterate(void)6138 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate(void)
6139 {
6140 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6141 const int betweenIterationFrameCount = 5; // draw misc between test samples
6142 const int frameNdx = m_frameNdx++;
6143 const int currentSampleNdx = m_iterationOrder[m_sampleNdx];
6144
6145 // Simulate work for about 8ms
6146 busyWait(8000);
6147
6148 // Busywork rendering during unused frames
6149 if (frameNdx != m_samples[currentSampleNdx].numFrames)
6150 {
6151 // draw similar from another buffer
6152 drawMisc();
6153 }
6154
6155 if (frameNdx == 0)
6156 {
6157 // upload and start the clock
6158 uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
6159 }
6160
6161 if (frameNdx ==
6162 m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
6163 {
6164 // draw using the uploaded buffer
6165 drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
6166
6167 // re-use buffer for something else to make sure test iteration do not affect each other
6168 if (m_bufferState == BUFFERSTATE_NEW)
6169 reuseAndDeleteBuffer();
6170 }
6171 else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationFrameCount)
6172 {
6173 // next sample
6174 ++m_sampleNdx;
6175 m_frameNdx = 0;
6176 }
6177
6178 GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
6179
6180 if (m_sampleNdx < (int)m_samples.size())
6181 return CONTINUE;
6182
6183 logAndSetTestResult();
6184 return STOP;
6185 }
6186
uploadBuffer(Sample & sample,Result & result)6187 void UploadWaitDrawCase::uploadBuffer(Sample &sample, Result &result)
6188 {
6189 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6190 uint64_t startTime;
6191 uint64_t endTime;
6192 glw::GLenum target;
6193 glw::GLsizeiptr size;
6194 const void *source;
6195
6196 // data source
6197
6198 if (m_targetBuffer == TARGETBUFFER_VERTEX)
6199 {
6200 DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
6201
6202 target = GL_ARRAY_BUFFER;
6203 size = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
6204 source = &m_vertexData[0];
6205 }
6206 else if (m_targetBuffer == TARGETBUFFER_INDEX)
6207 {
6208 DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
6209
6210 target = GL_ELEMENT_ARRAY_BUFFER;
6211 size = (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t));
6212 source = &m_indexData[0];
6213 }
6214 else
6215 {
6216 DE_ASSERT(false);
6217 return;
6218 }
6219
6220 // gen buffer
6221
6222 if (m_bufferState == BUFFERSTATE_NEW)
6223 {
6224 if (m_targetBuffer == TARGETBUFFER_VERTEX)
6225 {
6226 gl.genBuffers(1, &m_vertexBuffer);
6227 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6228 }
6229 else if (m_targetBuffer == TARGETBUFFER_INDEX)
6230 {
6231 gl.genBuffers(1, &m_indexBuffer);
6232 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6233 }
6234 else
6235 DE_ASSERT(false);
6236
6237 if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA || m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
6238 {
6239 gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
6240 }
6241 }
6242 else if (m_bufferState == BUFFERSTATE_EXISTING)
6243 {
6244 if (m_targetBuffer == TARGETBUFFER_VERTEX)
6245 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6246 else if (m_targetBuffer == TARGETBUFFER_INDEX)
6247 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6248 else
6249 DE_ASSERT(false);
6250 }
6251 else
6252 DE_ASSERT(false);
6253
6254 // upload
6255
6256 startTime = deGetMicroseconds();
6257
6258 if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
6259 gl.bufferData(target, size, source, GL_STATIC_DRAW);
6260 else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
6261 gl.bufferSubData(target, 0, size, source);
6262 else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
6263 {
6264 void *mapPtr;
6265 glw::GLboolean unmapSuccessful;
6266
6267 mapPtr = gl.mapBufferRange(target, 0, size,
6268 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
6269 if (!mapPtr)
6270 throw tcu::Exception("MapBufferRange returned NULL");
6271
6272 deMemcpy(mapPtr, source, (int)size);
6273
6274 // if unmapping fails, just try again later
6275 unmapSuccessful = gl.unmapBuffer(target);
6276 if (!unmapSuccessful)
6277 throw UnmapFailureError();
6278 }
6279 else
6280 DE_ASSERT(false);
6281
6282 endTime = deGetMicroseconds();
6283
6284 sample.uploadCallEndTime = endTime;
6285 result.uploadDuration = endTime - startTime;
6286 }
6287
drawFromBuffer(Sample & sample,Result & result)6288 void UploadWaitDrawCase::drawFromBuffer(Sample &sample, Result &result)
6289 {
6290 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6291 tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
6292 uint64_t startTime;
6293 uint64_t endTime;
6294
6295 DE_ASSERT(m_vertexBuffer != 0);
6296 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6297 DE_ASSERT(m_indexBuffer == 0);
6298 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6299 DE_ASSERT(m_indexBuffer != 0);
6300 else
6301 DE_ASSERT(false);
6302
6303 // draw
6304 {
6305 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6306 if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6307 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6308
6309 setupVertexAttribs();
6310
6311 // microseconds passed since return from upload call
6312 result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
6313
6314 startTime = deGetMicroseconds();
6315
6316 if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6317 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6318 else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6319 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6320 else
6321 DE_ASSERT(false);
6322
6323 endTime = deGetMicroseconds();
6324
6325 result.renderDuration = endTime - startTime;
6326 }
6327
6328 // read
6329 {
6330 startTime = deGetMicroseconds();
6331 glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
6332 endTime = deGetMicroseconds();
6333
6334 result.readDuration = endTime - startTime;
6335 }
6336
6337 result.renderReadDuration = result.renderDuration + result.readDuration;
6338 }
6339
reuseAndDeleteBuffer(void)6340 void UploadWaitDrawCase::reuseAndDeleteBuffer(void)
6341 {
6342 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6343
6344 if (m_targetBuffer == TARGETBUFFER_INDEX)
6345 {
6346 // respecify and delete index buffer
6347 static const uint32_t indices[3] = {1, 3, 8};
6348
6349 DE_ASSERT(m_indexBuffer != 0);
6350
6351 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
6352 gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
6353 gl.deleteBuffers(1, &m_indexBuffer);
6354 m_indexBuffer = 0;
6355 }
6356 else if (m_targetBuffer == TARGETBUFFER_VERTEX)
6357 {
6358 // respecify and delete vertex buffer
6359 static const tcu::Vec4 coloredTriangle[6] = {
6360 tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),
6361 tcu::Vec4(-0.2f, 0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(0.8f, -0.1f, 0.0f, 1.0f),
6362 };
6363
6364 DE_ASSERT(m_vertexBuffer != 0);
6365
6366 gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
6367 gl.drawArrays(GL_TRIANGLES, 0, 3);
6368 gl.deleteBuffers(1, &m_vertexBuffer);
6369 m_vertexBuffer = 0;
6370 }
6371
6372 waitGLResults();
6373 }
6374
logAndSetTestResult(void)6375 void UploadWaitDrawCase::logAndSetTestResult(void)
6376 {
6377 int uploadStabilization;
6378 int renderReadStabilization;
6379 int renderStabilization;
6380 int readStabilization;
6381 bool temporallyStable;
6382
6383 {
6384 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
6385 logSamples();
6386 }
6387
6388 {
6389 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
6390
6391 // log stabilization points
6392 renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
6393 uploadStabilization = findStabilizationSample(&Result::uploadDuration, "Upload time");
6394 renderStabilization = findStabilizationSample(&Result::renderDuration, "Draw call time");
6395 readStabilization = findStabilizationSample(&Result::readDuration, "ReadPixels time");
6396
6397 temporallyStable = true;
6398 temporallyStable &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
6399 temporallyStable &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
6400 temporallyStable &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
6401 temporallyStable &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
6402 }
6403
6404 {
6405 const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
6406
6407 // Check result sanily
6408 if (uploadStabilization != 0)
6409 m_testCtx.getLog() << tcu::TestLog::Message
6410 << "Warning! Upload times are not stable, test result may not be accurate."
6411 << tcu::TestLog::EndMessage;
6412 if (!temporallyStable)
6413 m_testCtx.getLog() << tcu::TestLog::Message
6414 << "Warning! Time samples do not seem to be temporally stable, sample times seem to "
6415 "drift to one direction during test execution."
6416 << tcu::TestLog::EndMessage;
6417
6418 // render & read
6419 if (renderReadStabilization == -1)
6420 m_testCtx.getLog() << tcu::TestLog::Message
6421 << "Combined time used in draw call and ReadPixels did not stabilize."
6422 << tcu::TestLog::EndMessage;
6423 else
6424 m_testCtx.getLog() << tcu::TestLog::Integer(
6425 "RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time",
6426 "frames", QP_KEY_TAG_TIME, renderReadStabilization);
6427
6428 // draw call
6429 if (renderStabilization == -1)
6430 m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize."
6431 << tcu::TestLog::EndMessage;
6432 else
6433 m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint",
6434 "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME,
6435 renderStabilization);
6436
6437 // readpixels
6438 if (readStabilization == -1)
6439 m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize."
6440 << tcu::TestLog::EndMessage;
6441 else
6442 m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint",
6443 "ReadPixels call time stabilization time", "frames",
6444 QP_KEY_TAG_TIME, readStabilization);
6445
6446 // Report renderReadStabilization
6447 if (renderReadStabilization != -1)
6448 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
6449 else
6450 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
6451 }
6452 }
6453
logSamples(void)6454 void UploadWaitDrawCase::logSamples(void)
6455 {
6456 // Inverse m_iterationOrder
6457
6458 std::vector<int> runOrder(m_iterationOrder.size());
6459 for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6460 runOrder[m_iterationOrder[ndx]] = ndx;
6461
6462 // Log samples
6463
6464 m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
6465 << tcu::TestLog::ValueInfo("NumSwaps", "SwapBuffers before use", "",
6466 QP_SAMPLE_VALUE_TAG_PREDICTOR)
6467 << tcu::TestLog::ValueInfo("Delay", "Time before use", "us", QP_SAMPLE_VALUE_TAG_PREDICTOR)
6468 << tcu::TestLog::ValueInfo("RunOrder", "Sample run order", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
6469 << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us",
6470 QP_SAMPLE_VALUE_TAG_RESPONSE)
6471 << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6472 << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6473 << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6474 << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6475 << tcu::TestLog::EndSampleInfo;
6476
6477 for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
6478 m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx].numFrames
6479 << (int)m_results[sampleNdx].timeBeforeUse << runOrder[sampleNdx]
6480 << (int)m_results[sampleNdx].renderReadDuration
6481 << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
6482 << (int)m_results[sampleNdx].uploadDuration << (int)m_results[sampleNdx].renderDuration
6483 << (int)m_results[sampleNdx].readDuration << tcu::TestLog::EndSample;
6484
6485 m_testCtx.getLog() << tcu::TestLog::EndSampleList;
6486 }
6487
drawMisc(void)6488 void UploadWaitDrawCase::drawMisc(void)
6489 {
6490 const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6491
6492 gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6493 setupVertexAttribs();
6494 gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
6495 }
6496
6497 struct DistributionCompareResult
6498 {
6499 bool equal;
6500 float standardDeviations;
6501 };
6502
6503 template <typename Comparer>
sumOfRanks(const std::vector<uint64_t> & testSamples,const std::vector<uint64_t> & allSamples,const Comparer & comparer)6504 static float sumOfRanks(const std::vector<uint64_t> &testSamples, const std::vector<uint64_t> &allSamples,
6505 const Comparer &comparer)
6506 {
6507 float sum = 0;
6508
6509 for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6510 {
6511 const uint64_t testSample = testSamples[sampleNdx];
6512 const int lowerIndex =
6513 (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6514 const int upperIndex =
6515 (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6516 const int lowerRank = lowerIndex + 1; // convert zero-indexed to rank
6517 const int upperRank = upperIndex; // convert zero-indexed to rank, upperIndex is last equal + 1
6518 const float rankMidpoint = (float)(lowerRank + upperRank) / 2.0f;
6519
6520 sum += rankMidpoint;
6521 }
6522
6523 return sum;
6524 }
6525
6526 template <typename Comparer>
distributionCompare(const std::vector<uint64_t> & orderedObservationsA,const std::vector<uint64_t> & orderedObservationsB,const Comparer & comparer)6527 static DistributionCompareResult distributionCompare(const std::vector<uint64_t> &orderedObservationsA,
6528 const std::vector<uint64_t> &orderedObservationsB,
6529 const Comparer &comparer)
6530 {
6531 // Mann-Whitney U test
6532
6533 const int n1 = (int)orderedObservationsA.size();
6534 const int n2 = (int)orderedObservationsB.size();
6535 std::vector<uint64_t> allSamples(n1 + n2);
6536
6537 std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6538 std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6539 std::sort(allSamples.begin(), allSamples.end());
6540
6541 {
6542 const float R1 = sumOfRanks(orderedObservationsA, allSamples, comparer);
6543
6544 const float U1 = (float)(n1 * n2 + n1 * (n1 + 1) / 2) - R1;
6545 const float U2 = (float)(n1 * n2) - U1;
6546 const float U = de::min(U1, U2);
6547
6548 // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6549
6550 const float mU = (float)(n1 * n2) / 2.0f;
6551 const float sigmaU = deFloatSqrt((float)(n1 * n2 * (n1 + n2 + 1)) / 12.0f);
6552 const float z = (U - mU) / sigmaU;
6553
6554 DistributionCompareResult result;
6555
6556 result.equal = (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6557 result.standardDeviations = z;
6558
6559 return result;
6560 }
6561 }
6562
6563 template <typename T>
6564 struct ThresholdComparer
6565 {
6566 float relativeThreshold;
6567 T absoluteThreshold;
6568
operator ()deqp::gles3::Performance::__anone1143f0e0111::ThresholdComparer6569 bool operator()(const T &a, const T &b) const
6570 {
6571 const float diff = de::abs((float)a - (float)b);
6572
6573 // thresholds
6574 if (diff <= (float)absoluteThreshold)
6575 return false;
6576 if (diff <= float(a) * relativeThreshold || diff <= float(b) * relativeThreshold)
6577 return false;
6578
6579 // cmp
6580 return a < b;
6581 }
6582 };
6583
findStabilizationSample(uint64_t UploadWaitDrawCase::Result::* target,const char * description)6584 int UploadWaitDrawCase::findStabilizationSample(uint64_t UploadWaitDrawCase::Result::*target, const char *description)
6585 {
6586 std::vector<std::vector<uint64_t>> sampleObservations(m_numMaxSwaps + 1);
6587 ThresholdComparer<uint64_t> comparer;
6588
6589 comparer.relativeThreshold = 0.15f; // 15%
6590 comparer.absoluteThreshold = 100; // (us), assumed sampling precision
6591
6592 // get observations and order them
6593
6594 for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6595 {
6596 int insertNdx = 0;
6597
6598 sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6599
6600 for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6601 if (m_samples[ndx].numFrames == swapNdx)
6602 sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6603
6604 DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6605
6606 std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6607 }
6608
6609 // find stabilization point
6610
6611 for (int sampleNdx = m_numMaxSwaps - 1; sampleNdx != -1; --sampleNdx)
6612 {
6613 // Distribution is equal to all following distributions
6614 for (int cmpTargetDistribution = sampleNdx + 1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6615 {
6616 // Stable section ends here?
6617 const DistributionCompareResult result =
6618 distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6619 if (!result.equal)
6620 {
6621 // Last two samples are not equal? Samples never stabilized
6622 if (sampleNdx == m_numMaxSwaps - 1)
6623 {
6624 m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
6625 << sampleNdx << " and " << cmpTargetDistribution
6626 << " do not seem to have the same distribution:\n"
6627 << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6628 << "\tSwap count " << sampleNdx
6629 << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6630 << "\tSwap count " << cmpTargetDistribution
6631 << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
6632 << "\n"
6633 << tcu::TestLog::EndMessage;
6634 return -1;
6635 }
6636 else
6637 {
6638 m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
6639 << sampleNdx << " and " << cmpTargetDistribution
6640 << " do not seem to have the same distribution:\n"
6641 << "\tSamples with swap count " << sampleNdx
6642 << " are not part of the tail of stable results.\n"
6643 << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6644 << "\tSwap count " << sampleNdx
6645 << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6646 << "\tSwap count " << cmpTargetDistribution
6647 << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
6648 << "\n"
6649 << tcu::TestLog::EndMessage;
6650
6651 return sampleNdx + 1;
6652 }
6653 }
6654 }
6655 }
6656
6657 m_testCtx.getLog() << tcu::TestLog::Message << description << ": All samples seem to have the same distribution"
6658 << tcu::TestLog::EndMessage;
6659
6660 // all distributions equal
6661 return 0;
6662 }
6663
checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::* target,const char * description)6664 bool UploadWaitDrawCase::checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::*target,
6665 const char *description)
6666 {
6667 // Try to find correlation with sample order and sample times
6668
6669 const int numDataPoints = (int)m_iterationOrder.size();
6670 std::vector<tcu::Vec2> dataPoints(m_iterationOrder.size());
6671 LineParametersWithConfidence lineFit;
6672
6673 for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6674 {
6675 dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6676 dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6677 }
6678
6679 lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6680
6681 // Difference of more than 25% of the offset along the whole sample range
6682 if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f)
6683 {
6684 m_testCtx.getLog() << tcu::TestLog::Message << description
6685 << ": Correlation with data point observation order and result time. Results are not "
6686 "temporally stable, observations are not independent.\n"
6687 << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6688 << tcu::TestLog::EndMessage;
6689
6690 return false;
6691 }
6692 else
6693 return true;
6694 }
6695
6696 } // namespace
6697
BufferDataUploadTests(Context & context)6698 BufferDataUploadTests::BufferDataUploadTests(Context &context)
6699 : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6700 {
6701 }
6702
~BufferDataUploadTests(void)6703 BufferDataUploadTests::~BufferDataUploadTests(void)
6704 {
6705 }
6706
init(void)6707 void BufferDataUploadTests::init(void)
6708 {
6709 static const struct BufferUsage
6710 {
6711 const char *name;
6712 uint32_t usage;
6713 bool primaryUsage;
6714 } bufferUsages[] = {
6715 {"stream_draw", GL_STREAM_DRAW, true}, {"stream_read", GL_STREAM_READ, false},
6716 {"stream_copy", GL_STREAM_COPY, false}, {"static_draw", GL_STATIC_DRAW, true},
6717 {"static_read", GL_STATIC_READ, false}, {"static_copy", GL_STATIC_COPY, false},
6718 {"dynamic_draw", GL_DYNAMIC_DRAW, true}, {"dynamic_read", GL_DYNAMIC_READ, false},
6719 {"dynamic_copy", GL_DYNAMIC_COPY, false},
6720 };
6721
6722 tcu::TestCaseGroup *const referenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Reference functions");
6723 tcu::TestCaseGroup *const functionCallGroup =
6724 new tcu::TestCaseGroup(m_testCtx, "function_call", "Function call timing");
6725 tcu::TestCaseGroup *const modifyAfterUseGroup =
6726 new tcu::TestCaseGroup(m_testCtx, "modify_after_use", "Function call time after buffer has been used");
6727 tcu::TestCaseGroup *const renderAfterUploadGroup = new tcu::TestCaseGroup(
6728 m_testCtx, "render_after_upload", "Function call time of draw commands after buffer has been modified");
6729
6730 addChild(referenceGroup);
6731 addChild(functionCallGroup);
6732 addChild(modifyAfterUseGroup);
6733 addChild(renderAfterUploadGroup);
6734
6735 // .reference
6736 {
6737 static const struct BufferSizeRange
6738 {
6739 const char *name;
6740 int minBufferSize;
6741 int maxBufferSize;
6742 int numSamples;
6743 bool largeBuffersCase;
6744 } sizeRanges[] = {
6745 {"small_buffers", 0, 1 << 18, 64, false}, // !< 0kB - 256kB
6746 {"large_buffers", 1 << 18, 1 << 24, 32, true}, // !< 256kB - 16MB
6747 };
6748
6749 for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6750 {
6751 referenceGroup->addChild(new ReferenceMemcpyCase(
6752 m_context, std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6753 "Test memcpy performance", sizeRanges[bufferSizeRangeNdx].minBufferSize,
6754 sizeRanges[bufferSizeRangeNdx].maxBufferSize, sizeRanges[bufferSizeRangeNdx].numSamples,
6755 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6756 }
6757 }
6758
6759 // .function_call
6760 {
6761 const int minBufferSize = 0; // !< 0kiB
6762 const int maxBufferSize = 1 << 24; // !< 16MiB
6763 const int numDataSamples = 25;
6764 const int numMapSamples = 25;
6765
6766 tcu::TestCaseGroup *const bufferDataMethodGroup =
6767 new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6768 tcu::TestCaseGroup *const bufferSubDataMethodGroup =
6769 new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6770 tcu::TestCaseGroup *const mapBufferRangeMethodGroup =
6771 new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6772
6773 functionCallGroup->addChild(bufferDataMethodGroup);
6774 functionCallGroup->addChild(bufferSubDataMethodGroup);
6775 functionCallGroup->addChild(mapBufferRangeMethodGroup);
6776
6777 // .buffer_data
6778 {
6779 static const struct TargetCase
6780 {
6781 tcu::TestCaseGroup *group;
6782 BufferDataUploadCase::CaseType caseType;
6783 bool allUsages;
6784 } targetCases[] = {
6785 {new tcu::TestCaseGroup(m_testCtx, "new_buffer", "Target new buffer"),
6786 BufferDataUploadCase::CASE_NEW_BUFFER, true},
6787 {new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer", "Target new unspecified buffer"),
6788 BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER, true},
6789 {new tcu::TestCaseGroup(m_testCtx, "specified_buffer", "Target new specified buffer"),
6790 BufferDataUploadCase::CASE_SPECIFIED_BUFFER, true},
6791 {new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Target buffer that was used in draw"),
6792 BufferDataUploadCase::CASE_USED_BUFFER, true},
6793 {new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer", "Target larger buffer that was used in draw"),
6794 BufferDataUploadCase::CASE_USED_LARGER_BUFFER, false},
6795 };
6796
6797 for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6798 {
6799 bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6800
6801 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6802 if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6803 targetCases[targetNdx].group->addChild(new BufferDataUploadCase(
6804 m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6805 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6806 minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
6807 targetCases[targetNdx].caseType));
6808 }
6809 }
6810
6811 // .buffer_sub_data
6812 {
6813 static const struct FlagCase
6814 {
6815 tcu::TestCaseGroup *group;
6816 BufferSubDataUploadCase::CaseType parentCase;
6817 bool allUsages;
6818 int flags;
6819 } flagCases[] = {
6820 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload", ""),
6821 BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_FULL_UPLOAD},
6822 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",
6823 "Clear buffer with bufferData(...,NULL) before sub data call"),
6824 BufferSubDataUploadCase::CASE_USED_BUFFER, false,
6825 BufferSubDataUploadCase::FLAG_FULL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
6826 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload", ""),
6827 BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD},
6828 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload",
6829 "Clear buffer with bufferData(...,NULL) before sub data call"),
6830 BufferSubDataUploadCase::CASE_USED_BUFFER, false,
6831 BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
6832 };
6833
6834 for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6835 {
6836 bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6837
6838 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6839 if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6840 flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(
6841 m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6842 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6843 minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
6844 flagCases[flagNdx].parentCase, flagCases[flagNdx].flags));
6845 }
6846 }
6847
6848 // .map_buffer_range
6849 {
6850 static const struct FlagCase
6851 {
6852 const char *name;
6853 bool usefulForUnusedBuffers;
6854 bool allUsages;
6855 int glFlags;
6856 int caseFlags;
6857 } flagCases[] = {
6858 {"flag_write_full", true, true, GL_MAP_WRITE_BIT, 0},
6859 {"flag_write_partial", true, true, GL_MAP_WRITE_BIT, MapBufferRangeCase::FLAG_PARTIAL},
6860 {"flag_read_write_full", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, 0},
6861 {"flag_read_write_partial", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,
6862 MapBufferRangeCase::FLAG_PARTIAL},
6863 {"flag_invalidate_range_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 0},
6864 {"flag_invalidate_range_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
6865 MapBufferRangeCase::FLAG_PARTIAL},
6866 {"flag_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
6867 {"flag_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
6868 MapBufferRangeCase::FLAG_PARTIAL},
6869 {"flag_write_full_manual_invalidate_buffer", false, false,
6870 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
6871 {"flag_write_partial_manual_invalidate_buffer", false, false,
6872 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
6873 MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
6874 {"flag_unsynchronized_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, 0},
6875 {"flag_unsynchronized_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,
6876 MapBufferRangeCase::FLAG_PARTIAL},
6877 {"flag_unsynchronized_and_invalidate_buffer_full", true, false,
6878 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
6879 {"flag_unsynchronized_and_invalidate_buffer_partial", true, false,
6880 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
6881 MapBufferRangeCase::FLAG_PARTIAL},
6882 };
6883 static const struct FlushCases
6884 {
6885 const char *name;
6886 int glFlags;
6887 int caseFlags;
6888 } flushCases[] = {
6889 {"flag_flush_explicit_map_full", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, 0},
6890 {"flag_flush_explicit_map_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6891 MapBufferRangeFlushCase::FLAG_PARTIAL},
6892 {"flag_flush_explicit_map_full_flush_in_parts", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6893 MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS},
6894 {"flag_flush_explicit_map_full_flush_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6895 MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL},
6896 };
6897 static const struct MapTestGroup
6898 {
6899 int flags;
6900 bool unusedBufferCase;
6901 tcu::TestCaseGroup *group;
6902 } groups[] = {
6903 {
6904 MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,
6905 true,
6906 new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer",
6907 "Test with unused, unspecified buffers"),
6908 },
6909 {
6910 MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,
6911 true,
6912 new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),
6913 },
6914 {0, false,
6915 new tcu::TestCaseGroup(m_testCtx, "used_buffer",
6916 "Test with used (data has been sourced from a buffer) buffers")},
6917 };
6918
6919 // we OR same flags to both range and flushRange cases, make sure it is legal
6920 DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER ==
6921 (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6922 DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER ==
6923 (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6924
6925 for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6926 {
6927 tcu::TestCaseGroup *const bufferTypeGroup = groups[groupNdx].group;
6928
6929 mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6930
6931 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6932 {
6933 if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6934 continue;
6935
6936 tcu::TestCaseGroup *const bufferUsageGroup =
6937 new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6938 bufferTypeGroup->addChild(bufferUsageGroup);
6939
6940 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6941 if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6942 bufferUsageGroup->addChild(new MapBufferRangeCase(
6943 m_context, bufferUsages[usageNdx].name,
6944 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6945 minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
6946 flagCases[caseNdx].glFlags, flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6947 }
6948
6949 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6950 {
6951 tcu::TestCaseGroup *const bufferUsageGroup =
6952 new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6953 bufferTypeGroup->addChild(bufferUsageGroup);
6954
6955 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6956 if (bufferUsages[usageNdx].primaryUsage)
6957 bufferUsageGroup->addChild(new MapBufferRangeFlushCase(
6958 m_context, bufferUsages[usageNdx].name,
6959 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6960 minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
6961 flushCases[caseNdx].glFlags, flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6962 }
6963 }
6964 }
6965 }
6966
6967 // .modify_after_use
6968 {
6969 const int minBufferSize = 0; // !< 0kiB
6970 const int maxBufferSize = 1 << 24; // !< 16MiB
6971
6972 static const struct Usage
6973 {
6974 const char *name;
6975 const char *description;
6976 uint32_t usage;
6977 } usages[] = {
6978 {"static_draw", "Test with GL_STATIC_DRAW", GL_STATIC_DRAW},
6979 {"dynamic_draw", "Test with GL_DYNAMIC_DRAW", GL_DYNAMIC_DRAW},
6980 {"stream_draw", "Test with GL_STREAM_DRAW", GL_STREAM_DRAW},
6981
6982 };
6983
6984 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6985 {
6986 tcu::TestCaseGroup *const usageGroup =
6987 new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6988 modifyAfterUseGroup->addChild(usageGroup);
6989
6990 usageGroup->addChild(new ModifyAfterWithBufferDataCase(m_context, "buffer_data",
6991 "Respecify buffer contents after use", minBufferSize,
6992 maxBufferSize, usages[usageNdx].usage, 0));
6993 usageGroup->addChild(new ModifyAfterWithBufferDataCase(
6994 m_context, "buffer_data_different_size", "Respecify buffer contents and size after use", minBufferSize,
6995 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6996 usageGroup->addChild(new ModifyAfterWithBufferDataCase(
6997 m_context, "buffer_data_repeated", "Respecify buffer contents after upload and use", minBufferSize,
6998 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6999
7000 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7001 m_context, "buffer_sub_data_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
7002 usages[usageNdx].usage, 0));
7003 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7004 m_context, "buffer_sub_data_partial", "Respecify buffer contents partially use", minBufferSize,
7005 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
7006 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7007 m_context, "buffer_sub_data_full_repeated", "Respecify buffer contents after upload and use",
7008 minBufferSize, maxBufferSize, usages[usageNdx].usage,
7009 ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
7010 usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7011 m_context, "buffer_sub_data_partial_repeated", "Respecify buffer contents partially upload and use",
7012 minBufferSize, maxBufferSize, usages[usageNdx].usage,
7013 ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED |
7014 ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
7015
7016 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7017 m_context, "map_flag_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
7018 usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT));
7019 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7020 m_context, "map_flag_write_partial", "Respecify buffer contents partially after use", minBufferSize,
7021 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7022 GL_MAP_WRITE_BIT));
7023 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7024 m_context, "map_flag_read_write_full", "Respecify buffer contents after use", minBufferSize,
7025 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
7026 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7027 m_context, "map_flag_read_write_partial", "Respecify buffer contents partially after use",
7028 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7029 GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
7030 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7031 m_context, "map_flag_invalidate_range_full", "Respecify buffer contents after use", minBufferSize,
7032 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
7033 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7034 m_context, "map_flag_invalidate_range_partial", "Respecify buffer contents partially after use",
7035 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7036 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
7037 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7038 m_context, "map_flag_invalidate_buffer_full", "Respecify buffer contents after use", minBufferSize,
7039 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
7040 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7041 m_context, "map_flag_invalidate_buffer_partial", "Respecify buffer contents partially after use",
7042 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7043 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
7044 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7045 m_context, "map_flag_unsynchronized_full", "Respecify buffer contents after use", minBufferSize,
7046 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
7047 usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7048 m_context, "map_flag_unsynchronized_partial", "Respecify buffer contents partially after use",
7049 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7050 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
7051
7052 usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
7053 m_context, "map_flag_flush_explicit_full", "Respecify buffer contents after use", minBufferSize,
7054 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
7055 usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
7056 m_context, "map_flag_flush_explicit_partial", "Respecify buffer contents partially after use",
7057 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,
7058 GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
7059 }
7060 }
7061
7062 // .render_after_upload
7063 {
7064 // .reference
7065 {
7066 tcu::TestCaseGroup *const renderReferenceGroup =
7067 new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
7068 renderAfterUploadGroup->addChild(renderReferenceGroup);
7069
7070 // .draw
7071 {
7072 tcu::TestCaseGroup *const drawGroup =
7073 new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
7074 renderReferenceGroup->addChild(drawGroup);
7075
7076 // Time consumed by readPixels
7077 drawGroup->addChild(new ReferenceReadPixelsTimeCase(
7078 m_context, "read_pixels", "Measure time consumed by readPixels() function call"));
7079
7080 // Time consumed by rendering
7081 drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_arrays",
7082 "Measure time consumed by drawArrays() function call",
7083 DRAWMETHOD_DRAW_ARRAYS));
7084 drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_elements",
7085 "Measure time consumed by drawElements() function call",
7086 DRAWMETHOD_DRAW_ELEMENTS));
7087 }
7088
7089 // .draw_upload_draw
7090 {
7091 static const struct
7092 {
7093 const char *name;
7094 const char *description;
7095 DrawMethod drawMethod;
7096 TargetBuffer targetBuffer;
7097 bool partial;
7098 } uploadTargets[] = {
7099 {"draw_arrays_upload_vertices",
7100 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
7101 "function calls.",
7102 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7103 {"draw_arrays_upload_vertices_partial",
7104 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
7105 "readPixels function calls.",
7106 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7107 {"draw_elements_upload_vertices",
7108 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and "
7109 "readPixels function calls.",
7110 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7111 {"draw_elements_upload_indices",
7112 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels "
7113 "function calls.",
7114 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7115 {"draw_elements_upload_indices_partial",
7116 "Measure time consumed by drawElements, partial index upload, another drawElements, and "
7117 "readPixels function calls.",
7118 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7119 };
7120 static const struct
7121 {
7122 const char *name;
7123 const char *description;
7124 UploadMethod uploadMethod;
7125 BufferInUseRenderTimeCase::MapFlags mapFlags;
7126 bool supportsPartialUpload;
7127 } uploadMethods[] = {
7128 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE,
7129 false},
7130 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
7131 BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
7132 {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7133 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
7134 {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7135 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
7136 };
7137
7138 tcu::TestCaseGroup *const drawUploadDrawGroup = new tcu::TestCaseGroup(
7139 m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
7140 renderReferenceGroup->addChild(drawUploadDrawGroup);
7141
7142 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7143 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7144 ++uploadMethodNdx)
7145 {
7146 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7147 uploadMethods[uploadMethodNdx].name;
7148
7149 if (uploadTargets[uploadTargetNdx].partial &&
7150 !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7151 continue;
7152
7153 drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(
7154 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7155 uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
7156 uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
7157 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7158 BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
7159 }
7160 }
7161 }
7162
7163 // .upload_unrelated_and_draw
7164 {
7165 static const struct
7166 {
7167 const char *name;
7168 const char *description;
7169 DrawMethod drawMethod;
7170 } drawMethods[] = {
7171 {"draw_arrays", "drawArrays", DRAWMETHOD_DRAW_ARRAYS},
7172 {"draw_elements", "drawElements", DRAWMETHOD_DRAW_ELEMENTS},
7173 };
7174
7175 static const struct
7176 {
7177 const char *name;
7178 UploadMethod uploadMethod;
7179 } uploadMethods[] = {
7180 {"buffer_data", UPLOADMETHOD_BUFFER_DATA},
7181 {"buffer_sub_data", UPLOADMETHOD_BUFFER_SUB_DATA},
7182 {"map_buffer_range", UPLOADMETHOD_MAP_BUFFER_RANGE},
7183 };
7184
7185 tcu::TestCaseGroup *const uploadUnrelatedGroup = new tcu::TestCaseGroup(
7186 m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
7187 renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
7188
7189 for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
7190 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
7191 {
7192 const std::string name = std::string() + drawMethods[drawMethodNdx].name +
7193 "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
7194 const std::string desc = std::string() + "Measure time consumed by " +
7195 drawMethods[drawMethodNdx].description +
7196 " function call after an unrelated upload";
7197
7198 // Time consumed by rendering command after an unrelated upload
7199
7200 uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(
7201 m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod,
7202 uploadMethods[uploadMethodNdx].uploadMethod));
7203 }
7204 }
7205
7206 // .upload_and_draw
7207 {
7208 static const struct
7209 {
7210 const char *name;
7211 const char *description;
7212 BufferState bufferState;
7213 UnrelatedBufferType unrelatedBuffer;
7214 bool supportsPartialUpload;
7215 } bufferConfigs[] = {
7216 {"used_buffer", "Upload to an used buffer", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_NONE, true},
7217 {"new_buffer", "Upload to a new buffer", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_NONE, false},
7218 {"used_buffer_and_unrelated_upload", "Upload to an used buffer and an unrelated buffer and then draw",
7219 BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_VERTEX, true},
7220 {"new_buffer_and_unrelated_upload", "Upload to a new buffer and an unrelated buffer and then draw",
7221 BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_VERTEX, false},
7222 };
7223
7224 tcu::TestCaseGroup *const uploadAndDrawGroup = new tcu::TestCaseGroup(
7225 m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
7226 renderAfterUploadGroup->addChild(uploadAndDrawGroup);
7227
7228 // .used_buffer
7229 // .new_buffer
7230 // .used_buffer_and_unrelated_upload
7231 // .new_buffer_and_unrelated_upload
7232 for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
7233 {
7234 static const struct
7235 {
7236 const char *name;
7237 const char *description;
7238 DrawMethod drawMethod;
7239 TargetBuffer targetBuffer;
7240 bool partial;
7241 } uploadTargets[] = {
7242 {"draw_arrays_upload_vertices",
7243 "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
7244 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7245 {"draw_arrays_upload_vertices_partial",
7246 "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function "
7247 "calls",
7248 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7249 {"draw_elements_upload_vertices",
7250 "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
7251 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7252 {"draw_elements_upload_indices",
7253 "Measure time consumed by index upload, drawElements, and readPixels function calls",
7254 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7255 {"draw_elements_upload_indices_partial",
7256 "Measure time consumed by partial index upload, drawElements, and readPixels function calls",
7257 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7258 };
7259 static const struct
7260 {
7261 const char *name;
7262 const char *description;
7263 UploadMethod uploadMethod;
7264 bool supportsPartialUpload;
7265 } uploadMethods[] = {
7266 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, false},
7267 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, true},
7268 {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, true},
7269 };
7270
7271 tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name,
7272 bufferConfigs[stateNdx].description);
7273 uploadAndDrawGroup->addChild(group);
7274
7275 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7276 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7277 ++uploadMethodNdx)
7278 {
7279 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7280 uploadMethods[uploadMethodNdx].name;
7281
7282 if (uploadTargets[uploadTargetNdx].partial &&
7283 !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7284 continue;
7285 if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
7286 continue;
7287
7288 // Don't log unrelated buffer information to samples if there is no such buffer
7289
7290 if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
7291 {
7292 typedef UploadRenderReadDuration SampleType;
7293 typedef GenericUploadRenderTimeCase<SampleType> TestType;
7294
7295 group->addChild(new TestType(
7296 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7297 uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7298 uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
7299 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7300 bufferConfigs[stateNdx].unrelatedBuffer));
7301 }
7302 else
7303 {
7304 typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType;
7305 typedef GenericUploadRenderTimeCase<SampleType> TestType;
7306
7307 group->addChild(new TestType(
7308 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7309 uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7310 uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
7311 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7312 bufferConfigs[stateNdx].unrelatedBuffer));
7313 }
7314 }
7315 }
7316 }
7317
7318 // .draw_modify_draw
7319 {
7320 static const struct
7321 {
7322 const char *name;
7323 const char *description;
7324 DrawMethod drawMethod;
7325 TargetBuffer targetBuffer;
7326 bool partial;
7327 } uploadTargets[] = {
7328 {"draw_arrays_upload_vertices",
7329 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
7330 "function calls.",
7331 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7332 {"draw_arrays_upload_vertices_partial",
7333 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
7334 "readPixels function calls.",
7335 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7336 {"draw_elements_upload_vertices",
7337 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels "
7338 "function calls.",
7339 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7340 {"draw_elements_upload_indices",
7341 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function "
7342 "calls.",
7343 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7344 {"draw_elements_upload_indices_partial",
7345 "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels "
7346 "function calls.",
7347 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7348 };
7349 static const struct
7350 {
7351 const char *name;
7352 const char *description;
7353 UploadMethod uploadMethod;
7354 BufferInUseRenderTimeCase::MapFlags mapFlags;
7355 bool supportsPartialUpload;
7356 } uploadMethods[] = {
7357 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false},
7358 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
7359 BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
7360 {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7361 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
7362 {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7363 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
7364 };
7365
7366 tcu::TestCaseGroup *const drawModifyDrawGroup = new tcu::TestCaseGroup(
7367 m_testCtx, "draw_modify_draw",
7368 "Time used in rendering functions with modified buffers while original buffer is still in use");
7369 renderAfterUploadGroup->addChild(drawModifyDrawGroup);
7370
7371 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7372 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
7373 {
7374 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7375 uploadMethods[uploadMethodNdx].name;
7376
7377 if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7378 continue;
7379
7380 drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(
7381 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7382 uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
7383 uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
7384 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7385 BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
7386 }
7387 }
7388
7389 // .upload_wait_draw
7390 {
7391 static const struct
7392 {
7393 const char *name;
7394 const char *description;
7395 BufferState bufferState;
7396 } bufferStates[] = {
7397 {"new_buffer", "Uploading to just generated name", BUFFERSTATE_NEW},
7398 {"used_buffer", "Uploading to a used buffer", BUFFERSTATE_EXISTING},
7399 };
7400 static const struct
7401 {
7402 const char *name;
7403 const char *description;
7404 DrawMethod drawMethod;
7405 TargetBuffer targetBuffer;
7406 } uploadTargets[] = {
7407 {"draw_arrays_vertices", "Upload vertex data, draw with drawArrays", DRAWMETHOD_DRAW_ARRAYS,
7408 TARGETBUFFER_VERTEX},
7409 {"draw_elements_vertices", "Upload vertex data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
7410 TARGETBUFFER_VERTEX},
7411 {"draw_elements_indices", "Upload index data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
7412 TARGETBUFFER_INDEX},
7413 };
7414 static const struct
7415 {
7416 const char *name;
7417 const char *description;
7418 UploadMethod uploadMethod;
7419 } uploadMethods[] = {
7420 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA},
7421 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA},
7422 {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE},
7423 };
7424
7425 tcu::TestCaseGroup *const uploadSwapDrawGroup = new tcu::TestCaseGroup(
7426 m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
7427 renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
7428
7429 for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
7430 {
7431 tcu::TestCaseGroup *const bufferGroup = new tcu::TestCaseGroup(
7432 m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
7433 uploadSwapDrawGroup->addChild(bufferGroup);
7434
7435 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7436 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7437 ++uploadMethodNdx)
7438 {
7439 const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7440 uploadMethods[uploadMethodNdx].name;
7441
7442 bufferGroup->addChild(new UploadWaitDrawCase(
7443 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7444 uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7445 uploadMethods[uploadMethodNdx].uploadMethod, bufferStates[bufferStateNdx].bufferState));
7446 }
7447 }
7448 }
7449 }
7450 }
7451
7452 } // namespace Performance
7453 } // namespace gles3
7454 } // namespace deqp
7455