xref: /aosp_15_r20/external/deqp/modules/gles3/performance/es3pBufferDataUploadTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Buffer data upload performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pBufferDataUploadTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuVectorUtil.hpp"
28 #include "tcuSurface.hpp"
29 #include "tcuCPUWarmup.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "gluRenderContext.hpp"
32 #include "gluShaderProgram.hpp"
33 #include "gluStrUtil.hpp"
34 #include "gluPixelTransfer.hpp"
35 #include "gluObjectWrapper.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 #include "deClock.h"
39 #include "deMath.h"
40 #include "deStringUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deMemory.h"
43 #include "deThread.h"
44 #include "deMeta.hpp"
45 
46 #include <algorithm>
47 #include <iomanip>
48 #include <limits>
49 
50 namespace deqp
51 {
52 namespace gles3
53 {
54 namespace Performance
55 {
56 namespace
57 {
58 
59 using de::meta::EnableIf;
60 using de::meta::Not;
61 using gls::LineParametersWithConfidence;
62 using gls::theilSenSiegelLinearRegression;
63 
64 static const char *const s_minimalVertexShader = "#version 300 es\n"
65                                                  "in highp vec4 a_position;\n"
66                                                  "void main (void)\n"
67                                                  "{\n"
68                                                  "    gl_Position = a_position;\n"
69                                                  "}\n";
70 
71 static const char *const s_minimalFragnentShader = "#version 300 es\n"
72                                                    "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
73                                                    "void main (void)\n"
74                                                    "{\n"
75                                                    "    dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
76                                                    "}\n";
77 
78 static const char *const s_colorVertexShader = "#version 300 es\n"
79                                                "in highp vec4 a_position;\n"
80                                                "in highp vec4 a_color;\n"
81                                                "out highp vec4 v_color;\n"
82                                                "void main (void)\n"
83                                                "{\n"
84                                                "    gl_Position = a_position;\n"
85                                                "    v_color = a_color;\n"
86                                                "}\n";
87 
88 static const char *const s_colorFragmentShader = "#version 300 es\n"
89                                                  "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
90                                                  "in mediump vec4 v_color;\n"
91                                                  "void main (void)\n"
92                                                  "{\n"
93                                                  "    dEQP_FragColor = v_color;\n"
94                                                  "}\n";
95 
96 struct SingleOperationDuration
97 {
98     uint64_t totalDuration;
99     uint64_t fitResponseDuration; // used for fitting
100 };
101 
102 struct MapBufferRangeDuration
103 {
104     uint64_t mapDuration;
105     uint64_t unmapDuration;
106     uint64_t writeDuration;
107     uint64_t allocDuration;
108     uint64_t totalDuration;
109 
110     uint64_t fitResponseDuration;
111 };
112 
113 struct MapBufferRangeDurationNoAlloc
114 {
115     uint64_t mapDuration;
116     uint64_t unmapDuration;
117     uint64_t writeDuration;
118     uint64_t totalDuration;
119 
120     uint64_t fitResponseDuration;
121 };
122 
123 struct MapBufferRangeFlushDuration
124 {
125     uint64_t mapDuration;
126     uint64_t unmapDuration;
127     uint64_t writeDuration;
128     uint64_t flushDuration;
129     uint64_t allocDuration;
130     uint64_t totalDuration;
131 
132     uint64_t fitResponseDuration;
133 };
134 
135 struct MapBufferRangeFlushDurationNoAlloc
136 {
137     uint64_t mapDuration;
138     uint64_t unmapDuration;
139     uint64_t writeDuration;
140     uint64_t flushDuration;
141     uint64_t totalDuration;
142 
143     uint64_t fitResponseDuration;
144 };
145 
146 struct RenderReadDuration
147 {
148     uint64_t renderDuration;
149     uint64_t readDuration;
150     uint64_t renderReadDuration;
151     uint64_t totalDuration;
152 
153     uint64_t fitResponseDuration;
154 };
155 
156 struct UnrelatedUploadRenderReadDuration
157 {
158     uint64_t renderDuration;
159     uint64_t readDuration;
160     uint64_t renderReadDuration;
161     uint64_t totalDuration;
162 
163     uint64_t fitResponseDuration;
164 };
165 
166 struct UploadRenderReadDuration
167 {
168     uint64_t uploadDuration;
169     uint64_t renderDuration;
170     uint64_t readDuration;
171     uint64_t totalDuration;
172     uint64_t renderReadDuration;
173 
174     uint64_t fitResponseDuration;
175 };
176 
177 struct UploadRenderReadDurationWithUnrelatedUploadSize
178 {
179     uint64_t uploadDuration;
180     uint64_t renderDuration;
181     uint64_t readDuration;
182     uint64_t totalDuration;
183     uint64_t renderReadDuration;
184 
185     uint64_t fitResponseDuration;
186 };
187 
188 struct RenderUploadRenderReadDuration
189 {
190     uint64_t firstRenderDuration;
191     uint64_t uploadDuration;
192     uint64_t secondRenderDuration;
193     uint64_t readDuration;
194     uint64_t totalDuration;
195     uint64_t renderReadDuration;
196 
197     uint64_t fitResponseDuration;
198 };
199 
200 template <typename SampleT>
201 struct UploadSampleResult
202 {
203     typedef SampleT SampleType;
204 
205     int bufferSize;
206     int allocatedSize;
207     int writtenSize;
208     SampleType duration;
209 };
210 
211 template <typename SampleT>
212 struct RenderSampleResult
213 {
214     typedef SampleT SampleType;
215 
216     int uploadedDataSize;
217     int renderDataSize;
218     int unrelatedDataSize;
219     int numVertices;
220     SampleT duration;
221 };
222 
223 struct SingleOperationStatistics
224 {
225     float minTime;
226     float maxTime;
227     float medianTime;
228     float min2DecileTime; // !< minimum value in the 2nd decile
229     float max9DecileTime; // !< maximum value in the 9th decile
230 };
231 
232 struct SingleCallStatistics
233 {
234     SingleOperationStatistics result;
235 
236     float medianRate;
237     float maxDiffTime;
238     float maxDiff9DecileTime;
239     float medianDiffTime;
240 
241     float maxRelDiffTime;
242     float max9DecileRelDiffTime;
243     float medianRelDiffTime;
244 };
245 
246 struct MapCallStatistics
247 {
248     SingleOperationStatistics map;
249     SingleOperationStatistics unmap;
250     SingleOperationStatistics write;
251     SingleOperationStatistics alloc;
252     SingleOperationStatistics result;
253 
254     float medianRate;
255     float maxDiffTime;
256     float maxDiff9DecileTime;
257     float medianDiffTime;
258 
259     float maxRelDiffTime;
260     float max9DecileRelDiffTime;
261     float medianRelDiffTime;
262 };
263 
264 struct MapFlushCallStatistics
265 {
266     SingleOperationStatistics map;
267     SingleOperationStatistics unmap;
268     SingleOperationStatistics write;
269     SingleOperationStatistics flush;
270     SingleOperationStatistics alloc;
271     SingleOperationStatistics result;
272 
273     float medianRate;
274     float maxDiffTime;
275     float maxDiff9DecileTime;
276     float medianDiffTime;
277 
278     float maxRelDiffTime;
279     float max9DecileRelDiffTime;
280     float medianRelDiffTime;
281 };
282 
283 struct RenderReadStatistics
284 {
285     SingleOperationStatistics render;
286     SingleOperationStatistics read;
287     SingleOperationStatistics result;
288     SingleOperationStatistics total;
289 
290     float medianRate;
291     float maxDiffTime;
292     float maxDiff9DecileTime;
293     float medianDiffTime;
294 
295     float maxRelDiffTime;
296     float max9DecileRelDiffTime;
297     float medianRelDiffTime;
298 };
299 
300 struct UploadRenderReadStatistics
301 {
302     SingleOperationStatistics upload;
303     SingleOperationStatistics render;
304     SingleOperationStatistics read;
305     SingleOperationStatistics result;
306     SingleOperationStatistics total;
307 
308     float medianRate;
309     float maxDiffTime;
310     float maxDiff9DecileTime;
311     float medianDiffTime;
312 
313     float maxRelDiffTime;
314     float max9DecileRelDiffTime;
315     float medianRelDiffTime;
316 };
317 
318 struct RenderUploadRenderReadStatistics
319 {
320     SingleOperationStatistics firstRender;
321     SingleOperationStatistics upload;
322     SingleOperationStatistics secondRender;
323     SingleOperationStatistics read;
324     SingleOperationStatistics result;
325     SingleOperationStatistics total;
326 
327     float medianRate;
328     float maxDiffTime;
329     float maxDiff9DecileTime;
330     float medianDiffTime;
331 
332     float maxRelDiffTime;
333     float max9DecileRelDiffTime;
334     float medianRelDiffTime;
335 };
336 
337 template <typename T>
338 struct SampleTypeTraits
339 {
340 };
341 
342 template <>
343 struct SampleTypeTraits<SingleOperationDuration>
344 {
345     typedef SingleCallStatistics StatsType;
346 
347     enum
348     {
349         HAS_MAP_STATS = 0
350     };
351     enum
352     {
353         HAS_UNMAP_STATS = 0
354     };
355     enum
356     {
357         HAS_WRITE_STATS = 0
358     };
359     enum
360     {
361         HAS_FLUSH_STATS = 0
362     };
363     enum
364     {
365         HAS_ALLOC_STATS = 0
366     };
367     enum
368     {
369         LOG_CONTRIBUTIONS = 0
370     };
371 };
372 
373 template <>
374 struct SampleTypeTraits<MapBufferRangeDuration>
375 {
376     typedef MapCallStatistics StatsType;
377 
378     enum
379     {
380         HAS_MAP_STATS = 1
381     };
382     enum
383     {
384         HAS_UNMAP_STATS = 1
385     };
386     enum
387     {
388         HAS_WRITE_STATS = 1
389     };
390     enum
391     {
392         HAS_FLUSH_STATS = 0
393     };
394     enum
395     {
396         HAS_ALLOC_STATS = 1
397     };
398     enum
399     {
400         LOG_CONTRIBUTIONS = 1
401     };
402 };
403 
404 template <>
405 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
406 {
407     typedef MapCallStatistics StatsType;
408 
409     enum
410     {
411         HAS_MAP_STATS = 1
412     };
413     enum
414     {
415         HAS_UNMAP_STATS = 1
416     };
417     enum
418     {
419         HAS_WRITE_STATS = 1
420     };
421     enum
422     {
423         HAS_FLUSH_STATS = 0
424     };
425     enum
426     {
427         HAS_ALLOC_STATS = 0
428     };
429     enum
430     {
431         LOG_CONTRIBUTIONS = 1
432     };
433 };
434 
435 template <>
436 struct SampleTypeTraits<MapBufferRangeFlushDuration>
437 {
438     typedef MapFlushCallStatistics StatsType;
439 
440     enum
441     {
442         HAS_MAP_STATS = 1
443     };
444     enum
445     {
446         HAS_UNMAP_STATS = 1
447     };
448     enum
449     {
450         HAS_WRITE_STATS = 1
451     };
452     enum
453     {
454         HAS_FLUSH_STATS = 1
455     };
456     enum
457     {
458         HAS_ALLOC_STATS = 1
459     };
460     enum
461     {
462         LOG_CONTRIBUTIONS = 1
463     };
464 };
465 
466 template <>
467 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
468 {
469     typedef MapFlushCallStatistics StatsType;
470 
471     enum
472     {
473         HAS_MAP_STATS = 1
474     };
475     enum
476     {
477         HAS_UNMAP_STATS = 1
478     };
479     enum
480     {
481         HAS_WRITE_STATS = 1
482     };
483     enum
484     {
485         HAS_FLUSH_STATS = 1
486     };
487     enum
488     {
489         HAS_ALLOC_STATS = 0
490     };
491     enum
492     {
493         LOG_CONTRIBUTIONS = 1
494     };
495 };
496 
497 template <>
498 struct SampleTypeTraits<RenderReadDuration>
499 {
500     typedef RenderReadStatistics StatsType;
501 
502     enum
503     {
504         HAS_RENDER_STATS = 1
505     };
506     enum
507     {
508         HAS_READ_STATS = 1
509     };
510     enum
511     {
512         HAS_UPLOAD_STATS = 0
513     };
514     enum
515     {
516         HAS_TOTAL_STATS = 1
517     };
518     enum
519     {
520         HAS_FIRST_RENDER_STATS = 0
521     };
522     enum
523     {
524         HAS_SECOND_RENDER_STATS = 0
525     };
526 
527     enum
528     {
529         LOG_CONTRIBUTIONS = 1
530     };
531 };
532 
533 template <>
534 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
535 {
536     typedef RenderReadStatistics StatsType;
537 
538     enum
539     {
540         HAS_RENDER_STATS = 1
541     };
542     enum
543     {
544         HAS_READ_STATS = 1
545     };
546     enum
547     {
548         HAS_UPLOAD_STATS = 0
549     };
550     enum
551     {
552         HAS_TOTAL_STATS = 1
553     };
554     enum
555     {
556         HAS_FIRST_RENDER_STATS = 0
557     };
558     enum
559     {
560         HAS_SECOND_RENDER_STATS = 0
561     };
562 
563     enum
564     {
565         LOG_CONTRIBUTIONS = 1
566     };
567 };
568 
569 template <>
570 struct SampleTypeTraits<UploadRenderReadDuration>
571 {
572     typedef UploadRenderReadStatistics StatsType;
573 
574     enum
575     {
576         HAS_RENDER_STATS = 1
577     };
578     enum
579     {
580         HAS_READ_STATS = 1
581     };
582     enum
583     {
584         HAS_UPLOAD_STATS = 1
585     };
586     enum
587     {
588         HAS_TOTAL_STATS = 1
589     };
590     enum
591     {
592         HAS_FIRST_RENDER_STATS = 0
593     };
594     enum
595     {
596         HAS_SECOND_RENDER_STATS = 0
597     };
598 
599     enum
600     {
601         LOG_CONTRIBUTIONS = 1
602     };
603     enum
604     {
605         LOG_UNRELATED_UPLOAD_SIZE = 0
606     };
607 };
608 
609 template <>
610 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
611 {
612     typedef UploadRenderReadStatistics StatsType;
613 
614     enum
615     {
616         HAS_RENDER_STATS = 1
617     };
618     enum
619     {
620         HAS_READ_STATS = 1
621     };
622     enum
623     {
624         HAS_UPLOAD_STATS = 1
625     };
626     enum
627     {
628         HAS_TOTAL_STATS = 1
629     };
630     enum
631     {
632         HAS_FIRST_RENDER_STATS = 0
633     };
634     enum
635     {
636         HAS_SECOND_RENDER_STATS = 0
637     };
638 
639     enum
640     {
641         LOG_CONTRIBUTIONS = 1
642     };
643     enum
644     {
645         LOG_UNRELATED_UPLOAD_SIZE = 1
646     };
647 };
648 
649 template <>
650 struct SampleTypeTraits<RenderUploadRenderReadDuration>
651 {
652     typedef RenderUploadRenderReadStatistics StatsType;
653 
654     enum
655     {
656         HAS_RENDER_STATS = 0
657     };
658     enum
659     {
660         HAS_READ_STATS = 1
661     };
662     enum
663     {
664         HAS_UPLOAD_STATS = 1
665     };
666     enum
667     {
668         HAS_TOTAL_STATS = 1
669     };
670     enum
671     {
672         HAS_FIRST_RENDER_STATS = 1
673     };
674     enum
675     {
676         HAS_SECOND_RENDER_STATS = 1
677     };
678 
679     enum
680     {
681         LOG_CONTRIBUTIONS = 1
682     };
683     enum
684     {
685         LOG_UNRELATED_UPLOAD_SIZE = 1
686     };
687 };
688 
689 struct UploadSampleAnalyzeResult
690 {
691     float transferRateMedian;
692     float transferRateAtRange;
693     float transferRateAtInfinity;
694 };
695 
696 struct RenderSampleAnalyzeResult
697 {
698     float renderRateMedian;
699     float renderRateAtRange;
700     float renderRateAtInfinity;
701 };
702 
703 class UnmapFailureError : public std::exception
704 {
705 public:
UnmapFailureError(void)706     UnmapFailureError(void) : std::exception()
707     {
708     }
709 };
710 
getHumanReadableByteSize(int numBytes)711 static std::string getHumanReadableByteSize(int numBytes)
712 {
713     std::ostringstream buf;
714 
715     if (numBytes < 1024)
716         buf << numBytes << " byte(s)";
717     else if (numBytes < 1024 * 1024)
718         buf << de::floatToString((float)numBytes / 1024.0f, 1) << " KiB";
719     else
720         buf << de::floatToString((float)numBytes / 1024.0f / 1024.0f, 1) << " MiB";
721 
722     return buf.str();
723 }
724 
medianTimeMemcpy(void * dst,const void * src,int numBytes)725 static uint64_t medianTimeMemcpy(void *dst, const void *src, int numBytes)
726 {
727     // Time used by memcpy is assumed to be asymptotically linear
728 
729     // With large numBytes, the probability of context switch or other random
730     // event is high. Apply memcpy in parts and report how much time would
731     // memcpy have used with the median transfer rate.
732 
733     // Less than 1MiB, no need to do anything special
734     if (numBytes < 1048576)
735     {
736         uint64_t startTime;
737         uint64_t endTime;
738 
739         deYield();
740 
741         startTime = deGetMicroseconds();
742         deMemcpy(dst, src, numBytes);
743         endTime = deGetMicroseconds();
744 
745         return endTime - startTime;
746     }
747     else
748     {
749         // Do memcpy in multiple parts
750 
751         const int numSections  = 5;
752         const int sectionAlign = 16;
753 
754         int sectionStarts[numSections + 1];
755         int sectionLens[numSections];
756         uint64_t sectionTimes[numSections];
757         uint64_t medianTime;
758         uint64_t bestTime = 0;
759 
760         for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
761             sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
762         sectionStarts[numSections] = numBytes;
763 
764         for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
765             sectionLens[sectionNdx] = sectionStarts[sectionNdx + 1] - sectionStarts[sectionNdx];
766 
767         // Memcpy is usually called after mapbuffer range which may take
768         // a lot of time. To prevent power management from kicking in during
769         // copy, warm up more.
770         {
771             deYield();
772             tcu::warmupCPU();
773             deYield();
774         }
775 
776         for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
777         {
778             uint64_t startTime;
779             uint64_t endTime;
780 
781             startTime = deGetMicroseconds();
782             deMemcpy((uint8_t *)dst + sectionStarts[sectionNdx], (const uint8_t *)src + sectionStarts[sectionNdx],
783                      sectionLens[sectionNdx]);
784             endTime = deGetMicroseconds();
785 
786             sectionTimes[sectionNdx] = endTime - startTime;
787 
788             if (!bestTime || sectionTimes[sectionNdx] < bestTime)
789                 bestTime = sectionTimes[sectionNdx];
790 
791             // Detect if write takes 50% longer than it should, and warm up if that happened
792             if (sectionNdx != numSections - 1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime)
793             {
794                 deYield();
795                 tcu::warmupCPU();
796                 deYield();
797             }
798         }
799 
800         std::sort(sectionTimes, sectionTimes + numSections);
801 
802         if ((numSections % 2) == 0)
803             medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
804         else
805             medianTime = sectionTimes[numSections / 2];
806 
807         return medianTime * numSections;
808     }
809 }
810 
busyworkCalculation(float initial,int workSize)811 static float busyworkCalculation(float initial, int workSize)
812 {
813     float a = initial;
814     int b   = 123;
815 
816     for (int ndx = 0; ndx < workSize; ++ndx)
817     {
818         a = deFloatCos(a + (float)b);
819         b = (b + 63) % 107 + de::abs((int)(a * 10.0f));
820     }
821 
822     return a + (float)b;
823 }
824 
busyWait(int microseconds)825 static void busyWait(int microseconds)
826 {
827     const uint64_t maxSingleWaitTime = 1000; // 1ms
828     const uint64_t endTime           = deGetMicroseconds() + microseconds;
829     float unused                     = *tcu::warmupCPUInternal::g_unused.m_v;
830     int workSize                     = 500;
831 
832     // exponentially increase work, cap to 1ms
833     while (deGetMicroseconds() < endTime)
834     {
835         const uint64_t startTime = deGetMicroseconds();
836         uint64_t totalTime;
837 
838         unused = busyworkCalculation(unused, workSize);
839 
840         totalTime = deGetMicroseconds() - startTime;
841 
842         if (totalTime >= maxSingleWaitTime)
843             break;
844         else
845             workSize *= 2;
846     }
847 
848     // "wait"
849     while (deGetMicroseconds() < endTime)
850         unused = busyworkCalculation(unused, workSize);
851 
852     *tcu::warmupCPUInternal::g_unused.m_v = unused;
853 }
854 
855 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
856 template <typename T>
linearSample(const std::vector<T> & values,float position)857 static float linearSample(const std::vector<T> &values, float position)
858 {
859     DE_ASSERT(position >= 0.0f);
860     DE_ASSERT(position <= 1.0f);
861 
862     const float floatNdx            = (float)(values.size() - 1) * position;
863     const int lowerNdx              = (int)deFloatFloor(floatNdx);
864     const int higherNdx             = lowerNdx + 1;
865     const float interpolationFactor = floatNdx - (float)lowerNdx;
866 
867     DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
868     DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
869     DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
870 
871     return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
872 }
873 
874 template <typename T>
calculateSingleOperationStatistics(const std::vector<T> & samples,uint64_t T::SampleType::* target)875 SingleOperationStatistics calculateSingleOperationStatistics(const std::vector<T> &samples,
876                                                              uint64_t T::SampleType::*target)
877 {
878     SingleOperationStatistics stats;
879     std::vector<uint64_t> values(samples.size());
880 
881     for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
882         values[ndx] = samples[ndx].duration.*target;
883 
884     std::sort(values.begin(), values.end());
885 
886     stats.minTime        = (float)values.front();
887     stats.maxTime        = (float)values.back();
888     stats.medianTime     = linearSample(values, 0.5f);
889     stats.min2DecileTime = linearSample(values, 0.1f);
890     stats.max9DecileTime = linearSample(values, 0.9f);
891 
892     return stats;
893 }
894 
895 template <typename StatisticsType, typename SampleType>
calculateBasicStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples,int SampleType::* predictor)896 void calculateBasicStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
897                               const std::vector<SampleType> &samples, int SampleType::*predictor)
898 {
899     std::vector<uint64_t> values(samples.size());
900 
901     for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
902         values[ndx] = samples[ndx].duration.fitResponseDuration;
903 
904     // median rate
905     {
906         std::vector<float> processingRates(samples.size());
907 
908         for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
909         {
910             const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f;
911             processingRates[ndx]      = (float)(samples[ndx].*predictor) / timeInSeconds;
912         }
913 
914         std::sort(processingRates.begin(), processingRates.end());
915 
916         stats.medianRate = linearSample(processingRates, 0.5f);
917     }
918 
919     // results compared to the approximation
920     {
921         std::vector<float> timeDiffs(samples.size());
922 
923         for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
924         {
925             const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
926             const float actual     = (float)values[ndx];
927             timeDiffs[ndx]         = actual - prediction;
928         }
929         std::sort(timeDiffs.begin(), timeDiffs.end());
930 
931         stats.maxDiffTime        = timeDiffs.back();
932         stats.maxDiff9DecileTime = linearSample(timeDiffs, 0.9f);
933         stats.medianDiffTime     = linearSample(timeDiffs, 0.5f);
934     }
935 
936     // relative comparison to the approximation
937     {
938         std::vector<float> relativeDiffs(samples.size());
939 
940         for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
941         {
942             const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
943             const float actual     = (float)values[ndx];
944 
945             // Ignore cases where we predict negative times, or if
946             // ratio would be (nearly) infinite: ignore if predicted
947             // time is less than 1 microsecond
948             if (prediction < 1.0f)
949                 relativeDiffs[ndx] = 0.0f;
950             else
951                 relativeDiffs[ndx] = (actual - prediction) / prediction;
952         }
953         std::sort(relativeDiffs.begin(), relativeDiffs.end());
954 
955         stats.maxRelDiffTime        = relativeDiffs.back();
956         stats.max9DecileRelDiffTime = linearSample(relativeDiffs, 0.9f);
957         stats.medianRelDiffTime     = linearSample(relativeDiffs, 0.5f);
958     }
959 
960     // values calculated using sorted timings
961 
962     std::sort(values.begin(), values.end());
963 
964     stats.result.minTime        = (float)values.front();
965     stats.result.maxTime        = (float)values.back();
966     stats.result.medianTime     = linearSample(values, 0.5f);
967     stats.result.min2DecileTime = linearSample(values, 0.1f);
968     stats.result.max9DecileTime = linearSample(values, 0.9f);
969 }
970 
971 template <typename StatisticsType, typename SampleType>
calculateBasicTransferStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)972 void calculateBasicTransferStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
973                                       const std::vector<SampleType> &samples)
974 {
975     calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
976 }
977 
978 template <typename StatisticsType, typename SampleType>
calculateBasicRenderStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)979 void calculateBasicRenderStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
980                                     const std::vector<SampleType> &samples)
981 {
982     calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
983 }
984 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)985 static SingleCallStatistics calculateSampleStatistics(
986     const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
987 {
988     SingleCallStatistics stats;
989 
990     calculateBasicTransferStatistics(stats, fit, samples);
991 
992     return stats;
993 }
994 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)995 static MapCallStatistics calculateSampleStatistics(
996     const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
997 {
998     MapCallStatistics stats;
999 
1000     calculateBasicTransferStatistics(stats, fit, samples);
1001 
1002     stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
1003     stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
1004     stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
1005     stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
1006 
1007     return stats;
1008 }
1009 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1010 static MapFlushCallStatistics calculateSampleStatistics(
1011     const LineParametersWithConfidence &fit,
1012     const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
1013 {
1014     MapFlushCallStatistics stats;
1015 
1016     calculateBasicTransferStatistics(stats, fit, samples);
1017 
1018     stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
1019     stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
1020     stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
1021     stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
1022     stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
1023 
1024     return stats;
1025 }
1026 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1027 static MapCallStatistics calculateSampleStatistics(
1028     const LineParametersWithConfidence &fit,
1029     const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
1030 {
1031     MapCallStatistics stats;
1032 
1033     calculateBasicTransferStatistics(stats, fit, samples);
1034 
1035     stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
1036     stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
1037     stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
1038 
1039     return stats;
1040 }
1041 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1042 static MapFlushCallStatistics calculateSampleStatistics(
1043     const LineParametersWithConfidence &fit,
1044     const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
1045 {
1046     MapFlushCallStatistics stats;
1047 
1048     calculateBasicTransferStatistics(stats, fit, samples);
1049 
1050     stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
1051     stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
1052     stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
1053     stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
1054 
1055     return stats;
1056 }
1057 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1058 static RenderReadStatistics calculateSampleStatistics(
1059     const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
1060 {
1061     RenderReadStatistics stats;
1062 
1063     calculateBasicRenderStatistics(stats, fit, samples);
1064 
1065     stats.render = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
1066     stats.read   = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
1067     stats.total  = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
1068 
1069     return stats;
1070 }
1071 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1072 static RenderReadStatistics calculateSampleStatistics(
1073     const LineParametersWithConfidence &fit,
1074     const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
1075 {
1076     RenderReadStatistics stats;
1077 
1078     calculateBasicRenderStatistics(stats, fit, samples);
1079 
1080     stats.render = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
1081     stats.read   = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
1082     stats.total  = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
1083 
1084     return stats;
1085 }
1086 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1087 static UploadRenderReadStatistics calculateSampleStatistics(
1088     const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
1089 {
1090     UploadRenderReadStatistics stats;
1091 
1092     calculateBasicRenderStatistics(stats, fit, samples);
1093 
1094     stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
1095     stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
1096     stats.read   = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
1097     stats.total  = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
1098 
1099     return stats;
1100 }
1101 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1102 static UploadRenderReadStatistics calculateSampleStatistics(
1103     const LineParametersWithConfidence &fit,
1104     const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
1105 {
1106     UploadRenderReadStatistics stats;
1107 
1108     calculateBasicRenderStatistics(stats, fit, samples);
1109 
1110     stats.upload =
1111         calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
1112     stats.render =
1113         calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
1114     stats.read =
1115         calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
1116     stats.total =
1117         calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
1118 
1119     return stats;
1120 }
1121 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1122 static RenderUploadRenderReadStatistics calculateSampleStatistics(
1123     const LineParametersWithConfidence &fit,
1124     const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
1125 {
1126     RenderUploadRenderReadStatistics stats;
1127 
1128     calculateBasicRenderStatistics(stats, fit, samples);
1129 
1130     stats.firstRender =
1131         calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
1132     stats.upload = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
1133     stats.secondRender =
1134         calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
1135     stats.read  = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
1136     stats.total = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
1137 
1138     return stats;
1139 }
1140 
1141 template <typename DurationType>
fitLineToSamples(const std::vector<UploadSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,uint64_t DurationType::* target=& DurationType::fitResponseDuration)1142 static LineParametersWithConfidence fitLineToSamples(
1143     const std::vector<UploadSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
1144     uint64_t DurationType::*target = &DurationType::fitResponseDuration)
1145 {
1146     std::vector<tcu::Vec2> samplePoints;
1147 
1148     for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
1149     {
1150         tcu::Vec2 point;
1151 
1152         point.x() = (float)(samples[sampleNdx].writtenSize);
1153         point.y() = (float)(samples[sampleNdx].duration.*target);
1154 
1155         samplePoints.push_back(point);
1156     }
1157 
1158     return theilSenSiegelLinearRegression(samplePoints, 0.6f);
1159 }
1160 
1161 template <typename DurationType>
fitLineToSamples(const std::vector<RenderSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,uint64_t DurationType::* target=& DurationType::fitResponseDuration)1162 static LineParametersWithConfidence fitLineToSamples(
1163     const std::vector<RenderSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
1164     uint64_t DurationType::*target = &DurationType::fitResponseDuration)
1165 {
1166     std::vector<tcu::Vec2> samplePoints;
1167 
1168     for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
1169     {
1170         tcu::Vec2 point;
1171 
1172         point.x() = (float)(samples[sampleNdx].renderDataSize);
1173         point.y() = (float)(samples[sampleNdx].duration.*target);
1174 
1175         samplePoints.push_back(point);
1176     }
1177 
1178     return theilSenSiegelLinearRegression(samplePoints, 0.6f);
1179 }
1180 
1181 template <typename T>
fitLineToSamples(const std::vector<T> & samples,int beginNdx,int endNdx,uint64_t T::SampleType::* target=& T::SampleType::fitResponseDuration)1182 static LineParametersWithConfidence fitLineToSamples(
1183     const std::vector<T> &samples, int beginNdx, int endNdx,
1184     uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
1185 {
1186     return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
1187 }
1188 
1189 template <typename T>
fitLineToSamples(const std::vector<T> & samples,uint64_t T::SampleType::* target=& T::SampleType::fitResponseDuration)1190 static LineParametersWithConfidence fitLineToSamples(
1191     const std::vector<T> &samples, uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
1192 {
1193     return fitLineToSamples(samples, 0, (int)samples.size(), target);
1194 }
1195 
getAreaBetweenLines(float xmin,float xmax,float lineAOffset,float lineACoefficient,float lineBOffset,float lineBCoefficient)1196 static float getAreaBetweenLines(float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset,
1197                                  float lineBCoefficient)
1198 {
1199     const float lineAMin     = lineAOffset + lineACoefficient * xmin;
1200     const float lineAMax     = lineAOffset + lineACoefficient * xmax;
1201     const float lineBMin     = lineBOffset + lineBCoefficient * xmin;
1202     const float lineBMax     = lineBOffset + lineBCoefficient * xmax;
1203     const bool aOverBAtBegin = (lineAMin > lineBMin);
1204     const bool aOverBAtEnd   = (lineAMax > lineBMax);
1205 
1206     if (aOverBAtBegin == aOverBAtEnd)
1207     {
1208         // lines do not intersect
1209 
1210         const float midpoint = (xmin + xmax) / 2.0f;
1211         const float width    = (xmax - xmin);
1212 
1213         const float lineAHeight = lineAOffset + lineACoefficient * midpoint;
1214         const float lineBHeight = lineBOffset + lineBCoefficient * midpoint;
1215 
1216         return width * de::abs(lineAHeight - lineBHeight);
1217     }
1218     else
1219     {
1220 
1221         // lines intersect
1222 
1223         const float approachCoeffient = de::abs(lineACoefficient - lineBCoefficient);
1224         const float epsilon           = 0.0001f;
1225         const float leftHeight        = de::abs(lineAMin - lineBMin);
1226         const float rightHeight       = de::abs(lineAMax - lineBMax);
1227 
1228         if (approachCoeffient < epsilon)
1229             return 0.0f;
1230 
1231         return (0.5f * leftHeight * (leftHeight / approachCoeffient)) +
1232                (0.5f * rightHeight * (rightHeight / approachCoeffient));
1233     }
1234 }
1235 
1236 template <typename T>
calculateSampleFitLinearity(const std::vector<T> & samples,int T::* predictor)1237 static float calculateSampleFitLinearity(const std::vector<T> &samples, int T::*predictor)
1238 {
1239     // Compare the fitted line of first half of the samples to the fitted line of
1240     // the second half of the samples. Calculate a AABB that fully contains every
1241     // sample's x component and both fit lines in this range. Calculate the ratio
1242     // of the area between the lines and the AABB.
1243 
1244     const float epsilon = 1.e-6f;
1245     const int midPoint  = (int)samples.size() / 2;
1246     const LineParametersWithConfidence startApproximation =
1247         fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1248     const LineParametersWithConfidence endApproximation =
1249         fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1250 
1251     const float aabbMinX = (float)(samples.front().*predictor);
1252     const float aabbMinY = de::min(startApproximation.offset + startApproximation.coefficient * aabbMinX,
1253                                    endApproximation.offset + endApproximation.coefficient * aabbMinX);
1254     const float aabbMaxX = (float)(samples.back().*predictor);
1255     const float aabbMaxY = de::max(startApproximation.offset + startApproximation.coefficient * aabbMaxX,
1256                                    endApproximation.offset + endApproximation.coefficient * aabbMaxX);
1257 
1258     const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1259     const float areaBetweenLines =
1260         getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient,
1261                             endApproximation.offset, endApproximation.coefficient);
1262     const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1263 
1264     return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1265 }
1266 
1267 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> & samples)1268 static float calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> &samples)
1269 {
1270     return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1271 }
1272 
1273 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> & samples)1274 static float calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> &samples)
1275 {
1276     return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1277 }
1278 
1279 template <typename T>
calculateSampleTemporalStability(const std::vector<T> & samples,int T::* predictor)1280 static float calculateSampleTemporalStability(const std::vector<T> &samples, int T::*predictor)
1281 {
1282     // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1283     // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1284     // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1285     // the lines and the AABB.
1286 
1287     const float epsilon = 1.e-6f;
1288     const LineParametersWithConfidence evenApproximation =
1289         fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1290     const LineParametersWithConfidence oddApproximation =
1291         fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1292 
1293     const float aabbMinX = (float)(samples.front().*predictor);
1294     const float aabbMinY = de::min(evenApproximation.offset + evenApproximation.coefficient * aabbMinX,
1295                                    oddApproximation.offset + oddApproximation.coefficient * aabbMinX);
1296     const float aabbMaxX = (float)(samples.back().*predictor);
1297     const float aabbMaxY = de::max(evenApproximation.offset + evenApproximation.coefficient * aabbMaxX,
1298                                    oddApproximation.offset + oddApproximation.coefficient * aabbMaxX);
1299 
1300     const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1301     const float areaBetweenLines =
1302         getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient,
1303                             oddApproximation.offset, oddApproximation.coefficient);
1304     const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1305 
1306     return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1307 }
1308 
1309 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> & samples)1310 static float calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> &samples)
1311 {
1312     return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1313 }
1314 
1315 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> & samples)1316 static float calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> &samples)
1317 {
1318     return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1319 }
1320 
1321 template <typename DurationType>
bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> & samples,std::vector<UploadSampleResult<DurationType>> * buckets,int numBuckets,int & minBufferSize,int & maxBufferSize)1322 static void bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> &samples,
1323                                       std::vector<UploadSampleResult<DurationType>> *buckets, int numBuckets,
1324                                       int &minBufferSize, int &maxBufferSize)
1325 {
1326     minBufferSize = 0;
1327     maxBufferSize = 0;
1328 
1329     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1330     {
1331         DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1332 
1333         if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1334             minBufferSize = samples[sampleNdx].allocatedSize;
1335         if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1336             maxBufferSize = samples[sampleNdx].allocatedSize;
1337     }
1338 
1339     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1340     {
1341         const float bucketNdxFloat = (float)(samples[sampleNdx].allocatedSize - minBufferSize) /
1342                                      (float)(maxBufferSize - minBufferSize) * (float)numBuckets;
1343         const int bucketNdx = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets - 1);
1344 
1345         buckets[bucketNdx].push_back(samples[sampleNdx]);
1346     }
1347 }
1348 
1349 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1350 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats(
1351     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1352 {
1353     log << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1354         << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1355         << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME,
1356                                stats.map.min2DecileTime)
1357         << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME,
1358                                stats.map.max9DecileTime)
1359         << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1360 }
1361 
1362 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1363 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats(
1364     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1365 {
1366     log << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1367         << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1368         << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1369         << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1370         << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1371 }
1372 
1373 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1374 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats(
1375     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1376 {
1377     log << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1378         << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1379         << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1380         << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1381         << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1382 }
1383 
1384 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1385 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats(
1386     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1387 {
1388     log << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1389         << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1390         << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1391         << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1392         << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1393 }
1394 
1395 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1396 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats(
1397     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1398 {
1399     log << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1400         << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1401         << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1402         << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1403         << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1404 }
1405 
1406 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1407 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats(
1408     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1409 {
1410     DE_UNREF(log);
1411     DE_UNREF(stats);
1412 }
1413 
1414 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1415 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats(
1416     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1417 {
1418     DE_UNREF(log);
1419     DE_UNREF(stats);
1420 }
1421 
1422 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1423 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats(
1424     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1425 {
1426     DE_UNREF(log);
1427     DE_UNREF(stats);
1428 }
1429 
1430 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1431 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats(
1432     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1433 {
1434     DE_UNREF(log);
1435     DE_UNREF(stats);
1436 }
1437 
1438 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1439 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats(
1440     tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
1441 {
1442     DE_UNREF(log);
1443     DE_UNREF(stats);
1444 }
1445 
1446 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1447 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution(
1448     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1449     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1450 {
1451     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1452     log << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1453                                contributionFitting.offset)
1454         << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1455                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1456         << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1457 }
1458 
1459 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1460 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution(
1461     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1462     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1463 {
1464     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1465     log << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1466                                contributionFitting.offset)
1467         << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1468                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1469         << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1470 }
1471 
1472 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1473 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution(
1474     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1475     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1476 {
1477     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1478     log << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1479                                contributionFitting.offset)
1480         << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1481                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1482         << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1483 }
1484 
1485 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1486 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution(
1487     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1488     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1489 {
1490     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1491     log << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1492                                contributionFitting.offset)
1493         << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1494                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1495         << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1496 }
1497 
1498 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1499 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution(
1500     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1501     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1502 {
1503     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1504     log << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1505                                contributionFitting.offset)
1506         << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1507                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1508         << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1509 }
1510 
1511 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1512 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution(
1513     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1514     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1515 {
1516     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1517     log << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1518                                contributionFitting.offset)
1519         << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1520                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1521         << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1522                                stats.render.medianTime);
1523 }
1524 
1525 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1526 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution(
1527     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1528     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1529 {
1530     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1531     log << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1532                                contributionFitting.offset)
1533         << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1534                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1535         << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1536 }
1537 
1538 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1539 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution(
1540     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1541     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1542 {
1543     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1544     log << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1545                                contributionFitting.offset)
1546         << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1547                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1548         << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME,
1549                                stats.upload.medianTime);
1550 }
1551 
1552 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1553 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution(
1554     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1555     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1556 {
1557     const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1558     log << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME,
1559                                contributionFitting.offset)
1560         << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
1561                                contributionFitting.coefficient * 1024.0f * 1024.0f)
1562         << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1563 }
1564 
1565 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1566 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution(
1567     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1568     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1569 {
1570     const LineParametersWithConfidence contributionFitting =
1571         fitLineToSamples(samples, &SampleType::firstRenderDuration);
1572     log << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us",
1573                                QP_KEY_TAG_TIME, contributionFitting.offset)
1574         << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB",
1575                                QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1576         << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1577                                stats.firstRender.medianTime);
1578 }
1579 
1580 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1581 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution(
1582     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1583     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1584 {
1585     const LineParametersWithConfidence contributionFitting =
1586         fitLineToSamples(samples, &SampleType::secondRenderDuration);
1587     log << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us",
1588                                QP_KEY_TAG_TIME, contributionFitting.offset)
1589         << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB",
1590                                QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1591         << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
1592                                stats.secondRender.medianTime);
1593 }
1594 
1595 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1596 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution(
1597     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1598     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1599 {
1600     DE_UNREF(log);
1601     DE_UNREF(samples);
1602     DE_UNREF(stats);
1603 }
1604 
1605 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1606 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution(
1607     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1608     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1609 {
1610     DE_UNREF(log);
1611     DE_UNREF(samples);
1612     DE_UNREF(stats);
1613 }
1614 
1615 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1616 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution(
1617     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1618     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1619 {
1620     DE_UNREF(log);
1621     DE_UNREF(samples);
1622     DE_UNREF(stats);
1623 }
1624 
1625 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1626 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution(
1627     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1628     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1629 {
1630     DE_UNREF(log);
1631     DE_UNREF(samples);
1632     DE_UNREF(stats);
1633 }
1634 
1635 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1636 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution(
1637     tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
1638     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1639 {
1640     DE_UNREF(log);
1641     DE_UNREF(samples);
1642     DE_UNREF(stats);
1643 }
1644 
1645 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1646 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution(
1647     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1648     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1649 {
1650     DE_UNREF(log);
1651     DE_UNREF(samples);
1652     DE_UNREF(stats);
1653 }
1654 
1655 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1656 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution(
1657     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1658     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1659 {
1660     DE_UNREF(log);
1661     DE_UNREF(samples);
1662     DE_UNREF(stats);
1663 }
1664 
1665 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1666 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution(
1667     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1668     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1669 {
1670     DE_UNREF(log);
1671     DE_UNREF(samples);
1672     DE_UNREF(stats);
1673 }
1674 
1675 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1676 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution(
1677     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1678     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1679 {
1680     DE_UNREF(log);
1681     DE_UNREF(samples);
1682     DE_UNREF(stats);
1683 }
1684 
1685 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1686 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution(
1687     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1688     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1689 {
1690     DE_UNREF(log);
1691     DE_UNREF(samples);
1692     DE_UNREF(stats);
1693 }
1694 
1695 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1696 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution(
1697     tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
1698     const typename SampleTypeTraits<SampleType>::StatsType &stats)
1699 {
1700     DE_UNREF(log);
1701     DE_UNREF(samples);
1702     DE_UNREF(stats);
1703 }
1704 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)1705 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1706                    const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
1707 {
1708     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1709         << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1710         << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1711         << tcu::TestLog::ValueInfo("UploadTime", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1712         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1713         << tcu::TestLog::EndSampleInfo;
1714 
1715     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1716     {
1717         const float fitResidual =
1718             (float)samples[sampleNdx].duration.fitResponseDuration -
1719             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1720         log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1721             << (int)samples[sampleNdx].duration.totalDuration << fitResidual << tcu::TestLog::EndSample;
1722     }
1723 
1724     log << tcu::TestLog::EndSampleList;
1725 }
1726 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)1727 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1728                    const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
1729 {
1730     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1731         << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1732         << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1733         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1734         << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1735         << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1736         << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1737         << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1738         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1739         << tcu::TestLog::EndSampleInfo;
1740 
1741     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1742     {
1743         const float fitResidual =
1744             (float)samples[sampleNdx].duration.fitResponseDuration -
1745             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1746         log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1747             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
1748             << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
1749             << (int)samples[sampleNdx].duration.writeDuration << fitResidual << tcu::TestLog::EndSample;
1750     }
1751 
1752     log << tcu::TestLog::EndSampleList;
1753 }
1754 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1755 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1756                    const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
1757 {
1758     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1759         << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1760         << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1761         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1762         << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1763         << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1764         << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1765         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1766         << tcu::TestLog::EndSampleInfo;
1767 
1768     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1769     {
1770         const float fitResidual =
1771             (float)samples[sampleNdx].duration.fitResponseDuration -
1772             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1773         log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1774             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
1775             << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
1776             << fitResidual << tcu::TestLog::EndSample;
1777     }
1778 
1779     log << tcu::TestLog::EndSampleList;
1780 }
1781 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1782 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1783                    const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
1784 {
1785     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1786         << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1787         << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1788         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1789         << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1790         << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1791         << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1792         << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1793         << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1794         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1795         << tcu::TestLog::EndSampleInfo;
1796 
1797     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1798     {
1799         const float fitResidual =
1800             (float)samples[sampleNdx].duration.fitResponseDuration -
1801             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1802         log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1803             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
1804             << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
1805             << (int)samples[sampleNdx].duration.writeDuration << (int)samples[sampleNdx].duration.flushDuration
1806             << fitResidual << tcu::TestLog::EndSample;
1807     }
1808 
1809     log << tcu::TestLog::EndSampleList;
1810 }
1811 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1812 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1813                    const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
1814 {
1815     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1816         << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1817         << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1818         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1819         << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1820         << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1821         << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1822         << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1823         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1824         << tcu::TestLog::EndSampleInfo;
1825 
1826     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1827     {
1828         const float fitResidual =
1829             (float)samples[sampleNdx].duration.fitResponseDuration -
1830             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
1831         log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
1832             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
1833             << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
1834             << (int)samples[sampleNdx].duration.flushDuration << fitResidual << tcu::TestLog::EndSample;
1835     }
1836 
1837     log << tcu::TestLog::EndSampleList;
1838 }
1839 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1840 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1841                    const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
1842 {
1843     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1844         << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1845         << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1846         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1847         << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1848         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1849         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1850         << tcu::TestLog::EndSampleInfo;
1851 
1852     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1853     {
1854         const float fitResidual =
1855             (float)samples[sampleNdx].duration.fitResponseDuration -
1856             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1857         log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
1858             << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.renderDuration
1859             << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1860     }
1861 
1862     log << tcu::TestLog::EndSampleList;
1863 }
1864 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1865 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1866                    const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
1867 {
1868     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1869         << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1870         << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1871         << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
1872                                    QP_SAMPLE_VALUE_TAG_PREDICTOR)
1873         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1874         << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1875         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1876         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1877         << tcu::TestLog::EndSampleInfo;
1878 
1879     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1880     {
1881         const float fitResidual =
1882             (float)samples[sampleNdx].duration.fitResponseDuration -
1883             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1884         log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
1885             << samples[sampleNdx].unrelatedDataSize << (int)samples[sampleNdx].duration.renderReadDuration
1886             << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
1887             << fitResidual << tcu::TestLog::EndSample;
1888     }
1889 
1890     log << tcu::TestLog::EndSampleList;
1891 }
1892 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1893 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1894                    const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
1895 {
1896     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1897         << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1898         << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1899         << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1900         << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1901         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1902         << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1903         << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1904         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1905         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1906         << tcu::TestLog::EndSampleInfo;
1907 
1908     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1909     {
1910         const float fitResidual =
1911             (float)samples[sampleNdx].duration.fitResponseDuration -
1912             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1913         log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1914             << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
1915             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.uploadDuration
1916             << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
1917             << fitResidual << tcu::TestLog::EndSample;
1918     }
1919 
1920     log << tcu::TestLog::EndSampleList;
1921 }
1922 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1923 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1924                    const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
1925 {
1926     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1927         << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1928         << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1929         << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1930         << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
1931                                    QP_SAMPLE_VALUE_TAG_PREDICTOR)
1932         << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1933         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1934         << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1935         << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1936         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1937         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1938         << tcu::TestLog::EndSampleInfo;
1939 
1940     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1941     {
1942         const float fitResidual =
1943             (float)samples[sampleNdx].duration.fitResponseDuration -
1944             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1945         log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1946             << samples[sampleNdx].numVertices << samples[sampleNdx].unrelatedDataSize
1947             << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration
1948             << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.renderDuration
1949             << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1950     }
1951 
1952     log << tcu::TestLog::EndSampleList;
1953 }
1954 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1955 void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
1956                    const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
1957 {
1958     log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
1959         << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1960         << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1961         << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1962         << tcu::TestLog::ValueInfo("DrawReadTime", "Second draw call and ReadPixels time", "us",
1963                                    QP_SAMPLE_VALUE_TAG_RESPONSE)
1964         << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1965         << tcu::TestLog::ValueInfo("FirstDrawCallTime", "First draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1966         << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1967         << tcu::TestLog::ValueInfo("SecondDrawCallTime", "Second draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1968         << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1969         << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1970         << tcu::TestLog::EndSampleInfo;
1971 
1972     for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1973     {
1974         const float fitResidual =
1975             (float)samples[sampleNdx].duration.fitResponseDuration -
1976             (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
1977         log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
1978             << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
1979             << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.firstRenderDuration
1980             << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.secondRenderDuration
1981             << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
1982     }
1983 
1984     log << tcu::TestLog::EndSampleList;
1985 }
1986 
1987 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,bool logBucketPerformance)1988 static UploadSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
1989                                                       const std::vector<UploadSampleResult<SampleType>> &samples,
1990                                                       bool logBucketPerformance)
1991 {
1992     // Assume data is linear with some outliers, fit a line
1993     const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
1994     const typename SampleTypeTraits<SampleType>::StatsType resultStats =
1995         calculateSampleStatistics(theilSenFitting, samples);
1996     float approximatedTransferRate;
1997     float approximatedTransferRateNoConstant;
1998 
1999     // Output raw samples
2000     {
2001         const tcu::ScopedLogSection section(log, "Samples", "Samples");
2002         logSampleList(log, theilSenFitting, samples);
2003     }
2004 
2005     // Calculate results for different ranges
2006     if (logBucketPerformance)
2007     {
2008         const int numBuckets = 4;
2009         int minBufferSize    = 0;
2010         int maxBufferSize    = 0;
2011         std::vector<UploadSampleResult<SampleType>> buckets[numBuckets];
2012 
2013         bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
2014 
2015         for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
2016         {
2017             if (buckets[bucketNdx].empty())
2018                 continue;
2019 
2020             // Print a nice result summary
2021 
2022             const int bucketRangeMin =
2023                 minBufferSize + (int)(((float)bucketNdx / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
2024             const int bucketRangeMax = minBufferSize + (int)(((float)(bucketNdx + 1) / (float)numBuckets) *
2025                                                              (float)(maxBufferSize - minBufferSize));
2026             const typename SampleTypeTraits<SampleType>::StatsType stats =
2027                 calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
2028             const tcu::ScopedLogSection section(
2029                 log, "BufferSizeRange",
2030                 std::string("Transfer performance with buffer size in range [")
2031                     .append(getHumanReadableByteSize(bucketRangeMin)
2032                                 .append(", ")
2033                                 .append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
2034 
2035             logMapRangeStats<SampleType>(log, stats);
2036             logUnmapStats<SampleType>(log, stats);
2037             logWriteStats<SampleType>(log, stats);
2038             logFlushStats<SampleType>(log, stats);
2039             logAllocStats<SampleType>(log, stats);
2040 
2041             log << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
2042                 << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
2043                 << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME,
2044                                        stats.result.min2DecileTime)
2045                 << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME,
2046                                        stats.result.max9DecileTime)
2047                 << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
2048                 << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2049                                        stats.medianRate / 1024.0f / 1024.0f)
2050                 << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME,
2051                                        stats.maxDiffTime)
2052                 << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME,
2053                                        stats.maxDiff9DecileTime)
2054                 << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME,
2055                                        stats.medianDiffTime)
2056                 << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE,
2057                                        stats.maxRelDiffTime * 100.0f)
2058                 << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%",
2059                                        QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
2060                 << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%",
2061                                        QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
2062         }
2063     }
2064 
2065     // Contributions
2066     if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
2067     {
2068         const tcu::ScopedLogSection section(log, "Contribution", "Contributions");
2069 
2070         logMapContribution(log, samples, resultStats);
2071         logUnmapContribution(log, samples, resultStats);
2072         logWriteContribution(log, samples, resultStats);
2073         logFlushContribution(log, samples, resultStats);
2074         logAllocContribution(log, samples, resultStats);
2075     }
2076 
2077     // Print results
2078     {
2079         const tcu::ScopedLogSection section(log, "Results", "Results");
2080 
2081         const int medianBufferSize = (samples.front().bufferSize + samples.back().bufferSize) / 2;
2082         const float approximatedTransferTime =
2083             (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
2084         const float approximatedTransferTimeNoConstant =
2085             (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
2086         const float sampleLinearity         = calculateSampleFitLinearity(samples);
2087         const float sampleTemporalStability = calculateSampleTemporalStability(samples);
2088 
2089         approximatedTransferRateNoConstant = (float)medianBufferSize / approximatedTransferTimeNoConstant;
2090         approximatedTransferRate           = (float)medianBufferSize / approximatedTransferTime;
2091 
2092         log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
2093                                    sampleLinearity * 100.0f)
2094             << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
2095                                    sampleTemporalStability * 100.0f)
2096             << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
2097                                    theilSenFitting.offset)
2098             << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
2099                                    "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
2100                                    theilSenFitting.offsetConfidenceLower)
2101             << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
2102                                    "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
2103                                    theilSenFitting.offsetConfidenceUpper)
2104             << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
2105                                    theilSenFitting.coefficient * 1024.0f * 1024.0f)
2106             << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
2107                                    "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
2108                                    theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
2109             << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
2110                                    "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
2111                                    theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
2112             << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s",
2113                                    QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
2114             << tcu::TestLog::Float("ApproximatedTransferRateNoConstant",
2115                                    "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE,
2116                                    approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
2117             << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
2118                                    resultStats.result.medianTime)
2119             << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2120                                    resultStats.medianRate / 1024.0f / 1024.0f);
2121     }
2122 
2123     // return approximated transfer rate
2124     {
2125         UploadSampleAnalyzeResult result;
2126 
2127         result.transferRateMedian     = resultStats.medianRate;
2128         result.transferRateAtRange    = approximatedTransferRate;
2129         result.transferRateAtInfinity = approximatedTransferRateNoConstant;
2130 
2131         return result;
2132     }
2133 }
2134 
2135 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples)2136 static RenderSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
2137                                                       const std::vector<RenderSampleResult<SampleType>> &samples)
2138 {
2139     // Assume data is linear with some outliers, fit a line
2140     const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
2141     const typename SampleTypeTraits<SampleType>::StatsType resultStats =
2142         calculateSampleStatistics(theilSenFitting, samples);
2143     float approximatedProcessingRate;
2144     float approximatedProcessingRateNoConstant;
2145 
2146     // output raw samples
2147     {
2148         const tcu::ScopedLogSection section(log, "Samples", "Samples");
2149         logSampleList(log, theilSenFitting, samples);
2150     }
2151 
2152     // Contributions
2153     if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
2154     {
2155         const tcu::ScopedLogSection section(log, "Contribution", "Contributions");
2156 
2157         logFirstRenderContribution(log, samples, resultStats);
2158         logUploadContribution(log, samples, resultStats);
2159         logRenderContribution(log, samples, resultStats);
2160         logSecondRenderContribution(log, samples, resultStats);
2161         logReadContribution(log, samples, resultStats);
2162         logTotalContribution(log, samples, resultStats);
2163     }
2164 
2165     // print results
2166     {
2167         const tcu::ScopedLogSection section(log, "Results", "Results");
2168 
2169         const int medianDataSize = (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
2170         const float approximatedRenderTime =
2171             (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
2172         const float approximatedRenderTimeNoConstant =
2173             (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
2174         const float sampleLinearity         = calculateSampleFitLinearity(samples);
2175         const float sampleTemporalStability = calculateSampleTemporalStability(samples);
2176 
2177         approximatedProcessingRateNoConstant = (float)medianDataSize / approximatedRenderTimeNoConstant;
2178         approximatedProcessingRate           = (float)medianDataSize / approximatedRenderTime;
2179 
2180         log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
2181                                    sampleLinearity * 100.0f)
2182             << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
2183                                    sampleTemporalStability * 100.0f)
2184             << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
2185                                    theilSenFitting.offset)
2186             << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
2187                                    "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
2188                                    theilSenFitting.offsetConfidenceLower)
2189             << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
2190                                    "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
2191                                    theilSenFitting.offsetConfidenceUpper)
2192             << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
2193                                    theilSenFitting.coefficient * 1024.0f * 1024.0f)
2194             << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
2195                                    "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
2196                                    theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
2197             << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
2198                                    "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
2199                                    theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
2200             << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s",
2201                                    QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
2202             << tcu::TestLog::Float("ApproximatedProcessRateNoConstant",
2203                                    "Approximated processing rate without constant cost", "MB / s",
2204                                    QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
2205             << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
2206                                    resultStats.result.medianTime)
2207             << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
2208                                    resultStats.medianRate / 1024.0f / 1024.0f);
2209     }
2210 
2211     // return approximated render rate
2212     {
2213         RenderSampleAnalyzeResult result;
2214 
2215         result.renderRateMedian     = resultStats.medianRate;
2216         result.renderRateAtRange    = approximatedProcessingRate;
2217         result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
2218 
2219         return result;
2220     }
2221     return RenderSampleAnalyzeResult();
2222 }
2223 
generateTwoPassRandomIterationOrder(std::vector<int> & iterationOrder,int numSamples)2224 static void generateTwoPassRandomIterationOrder(std::vector<int> &iterationOrder, int numSamples)
2225 {
2226     de::Random rnd(0xabc);
2227     const int midPoint = (numSamples + 1) / 2; // !< ceil(m_numSamples / 2)
2228 
2229     DE_ASSERT((int)iterationOrder.size() == numSamples);
2230 
2231     // Two "passes" over range, randomize order in both passes
2232     // This allows to us detect if iterations are not independent
2233     // (first run and later run samples differ significantly?)
2234 
2235     for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
2236         iterationOrder[sampleNdx] = sampleNdx * 2;
2237     for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
2238         iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
2239 
2240     for (int ndx = 0; ndx < midPoint; ++ndx)
2241         std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
2242     for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
2243         std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size() - 1)]);
2244 }
2245 
2246 template <typename SampleType>
2247 class BasicBufferCase : public TestCase
2248 {
2249 public:
2250     enum Flags
2251     {
2252         FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
2253     };
2254     BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
2255                     int numSamples, int flags);
2256     ~BasicBufferCase(void);
2257 
2258     virtual void init(void);
2259     virtual void deinit(void);
2260 
2261 protected:
2262     IterateResult iterate(void);
2263 
2264     virtual bool runSample(int iteration, UploadSampleResult<SampleType> &sample)                = 0;
2265     virtual void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results) = 0;
2266 
2267     void disableGLWarmup(void);
2268     void waitGLResults(void);
2269 
2270     enum
2271     {
2272         UNUSED_RENDER_AREA_SIZE = 32
2273     };
2274 
2275     glu::ShaderProgram *m_minimalProgram;
2276     int32_t m_minimalProgramPosLoc;
2277     uint32_t m_bufferID;
2278 
2279     const int m_numSamples;
2280     const int m_bufferSizeMin;
2281     const int m_bufferSizeMax;
2282     const bool m_allocateLargerBuffer;
2283 
2284 private:
2285     int m_iteration;
2286     std::vector<int> m_iterationOrder;
2287     std::vector<UploadSampleResult<SampleType>> m_results;
2288 
2289     bool m_useGL;
2290     int m_bufferRandomizerTimer;
2291 };
2292 
2293 template <typename SampleType>
BasicBufferCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,int flags)2294 BasicBufferCase<SampleType>::BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
2295                                              int bufferSizeMax, int numSamples, int flags)
2296     : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, desc)
2297     , m_minimalProgram(DE_NULL)
2298     , m_minimalProgramPosLoc(-1)
2299     , m_bufferID(0)
2300     , m_numSamples(numSamples)
2301     , m_bufferSizeMin(bufferSizeMin)
2302     , m_bufferSizeMax(bufferSizeMax)
2303     , m_allocateLargerBuffer((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
2304     , m_iteration(0)
2305     , m_iterationOrder(numSamples)
2306     , m_results(numSamples)
2307     , m_useGL(true)
2308     , m_bufferRandomizerTimer(0)
2309 {
2310     // "randomize" iteration order. Deterministic, patternless
2311     generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
2312 
2313     // choose buffer sizes
2314     for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
2315     {
2316         const int rawBufferSize =
2317             (int)deFloatFloor((float)bufferSizeMin +
2318                               (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples));
2319         const int bufferSize = deAlign32(rawBufferSize, 16);
2320         const int allocatedBufferSize =
2321             deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16);
2322 
2323         m_results[sampleNdx].bufferSize    = bufferSize;
2324         m_results[sampleNdx].allocatedSize = allocatedBufferSize;
2325         m_results[sampleNdx].writtenSize   = -1;
2326     }
2327 }
2328 
2329 template <typename SampleType>
~BasicBufferCase(void)2330 BasicBufferCase<SampleType>::~BasicBufferCase(void)
2331 {
2332     deinit();
2333 }
2334 
2335 template <typename SampleType>
init(void)2336 void BasicBufferCase<SampleType>::init(void)
2337 {
2338     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2339 
2340     if (!m_useGL)
2341         return;
2342 
2343     // \note Viewport size is not checked, it won't matter if the render target actually is smaller than UNUSED_RENDER_AREA_SIZE
2344 
2345     // minimal shader
2346 
2347     m_minimalProgram = new glu::ShaderProgram(m_context.getRenderContext(),
2348                                               glu::ProgramSources() << glu::VertexSource(s_minimalVertexShader)
2349                                                                     << glu::FragmentSource(s_minimalFragnentShader));
2350     if (!m_minimalProgram->isOk())
2351     {
2352         m_testCtx.getLog() << *m_minimalProgram;
2353         throw tcu::TestError("failed to build shader program");
2354     }
2355 
2356     m_minimalProgramPosLoc = gl.getAttribLocation(m_minimalProgram->getProgram(), "a_position");
2357     if (m_minimalProgramPosLoc == -1)
2358         throw tcu::TestError("a_position location was -1");
2359 }
2360 
2361 template <typename SampleType>
deinit(void)2362 void BasicBufferCase<SampleType>::deinit(void)
2363 {
2364     if (m_bufferID)
2365     {
2366         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2367         m_bufferID = 0;
2368     }
2369 
2370     delete m_minimalProgram;
2371     m_minimalProgram = DE_NULL;
2372 }
2373 
2374 template <typename SampleType>
iterate(void)2375 TestCase::IterateResult BasicBufferCase<SampleType>::iterate(void)
2376 {
2377     const glw::Functions &gl    = m_context.getRenderContext().getFunctions();
2378     static bool buffersWarmedUp = false;
2379 
2380     static const uint32_t usages[] = {
2381         GL_STREAM_DRAW, GL_STREAM_READ,  GL_STREAM_COPY,  GL_STATIC_DRAW,  GL_STATIC_READ,
2382         GL_STATIC_COPY, GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2383     };
2384 
2385     // Allocate some random sized buffers and remove them to
2386     // make sure the first samples too have some buffers removed
2387     // just before their allocation. This is only needed by the
2388     // the first test.
2389 
2390     if (m_useGL && !buffersWarmedUp)
2391     {
2392         const int numRandomBuffers = 6;
2393         const int numRepeats       = 10;
2394         const int maxBufferSize    = 16777216;
2395         const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
2396         de::Random rnd(0x1234);
2397         uint32_t bufferIDs[numRandomBuffers] = {0};
2398 
2399         gl.useProgram(m_minimalProgram->getProgram());
2400         gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2401         gl.enableVertexAttribArray(m_minimalProgramPosLoc);
2402 
2403         for (int ndx = 0; ndx < numRepeats; ++ndx)
2404         {
2405             // Create buffer and maybe draw from it
2406             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2407             {
2408                 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2409                 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2410 
2411                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2412                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2413                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2414 
2415                 if (rnd.getBool())
2416                 {
2417                     gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2418                     gl.drawArrays(GL_POINTS, 0, 1);
2419                     gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2420                 }
2421             }
2422 
2423             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2424                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2425 
2426             waitGLResults();
2427             GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2428 
2429             m_testCtx.touchWatchdog();
2430         }
2431 
2432         buffersWarmedUp = true;
2433         return CONTINUE;
2434     }
2435     else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2436     {
2437         // Do some random buffer operations to every now and then
2438         // to make sure the previous test iterations won't affect
2439         // following test runs.
2440 
2441         const int numRandomBuffers = 3;
2442         const int maxBufferSize    = 16777216;
2443         const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
2444         de::Random rnd(0x1234 + 0xabc * m_bufferRandomizerTimer);
2445 
2446         // BufferData
2447         {
2448             uint32_t bufferIDs[numRandomBuffers] = {0};
2449 
2450             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2451             {
2452                 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2453                 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2454 
2455                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2456                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2457                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2458             }
2459 
2460             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2461                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2462         }
2463 
2464         GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2465 
2466         // Do some memory mappings
2467         {
2468             uint32_t bufferIDs[numRandomBuffers] = {0};
2469 
2470             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2471             {
2472                 const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
2473                 const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
2474                 void *ptr;
2475 
2476                 gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2477                 gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2478                 gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2479 
2480                 gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2481                 gl.drawArrays(GL_POINTS, 0, 1);
2482                 gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2483 
2484                 if (rnd.getBool())
2485                     waitGLResults();
2486 
2487                 ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2488                 if (ptr)
2489                 {
2490                     medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2491                     gl.unmapBuffer(GL_ARRAY_BUFFER);
2492                 }
2493             }
2494 
2495             for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2496                 gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2497 
2498             waitGLResults();
2499         }
2500 
2501         GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2502         return CONTINUE;
2503     }
2504     else
2505     {
2506         const int currentIteration     = m_iteration;
2507         const int sampleNdx            = m_iterationOrder[currentIteration];
2508         const bool sampleRunSuccessful = runSample(currentIteration, m_results[sampleNdx]);
2509 
2510         GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2511 
2512         // Retry failed samples
2513         if (!sampleRunSuccessful)
2514             return CONTINUE;
2515 
2516         if (++m_iteration >= m_numSamples)
2517         {
2518             logAndSetTestResult(m_results);
2519             return STOP;
2520         }
2521         else
2522             return CONTINUE;
2523     }
2524 }
2525 
2526 template <typename SampleType>
disableGLWarmup(void)2527 void BasicBufferCase<SampleType>::disableGLWarmup(void)
2528 {
2529     m_useGL = false;
2530 }
2531 
2532 template <typename SampleType>
waitGLResults(void)2533 void BasicBufferCase<SampleType>::waitGLResults(void)
2534 {
2535     tcu::Surface unusedSurface(UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2536     glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
2537 }
2538 
2539 template <typename SampleType>
2540 class BasicUploadCase : public BasicBufferCase<SampleType>
2541 {
2542 public:
2543     enum CaseType
2544     {
2545         CASE_NO_BUFFERS = 0,
2546         CASE_NEW_BUFFER,
2547         CASE_UNSPECIFIED_BUFFER,
2548         CASE_SPECIFIED_BUFFER,
2549         CASE_USED_BUFFER,
2550         CASE_USED_LARGER_BUFFER,
2551 
2552         CASE_LAST
2553     };
2554 
2555     enum CaseFlags
2556     {
2557         FLAG_DONT_LOG_BUFFER_INFO              = 0x01,
2558         FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT = 0x02,
2559     };
2560 
2561     enum ResultType
2562     {
2563         RESULT_MEDIAN_TRANSFER_RATE = 0,
2564         RESULT_ASYMPTOTIC_TRANSFER_RATE,
2565     };
2566 
2567     BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
2568                     int numSamples, uint32_t bufferUsage, CaseType caseType, ResultType resultType, int flags = 0);
2569 
2570     ~BasicUploadCase(void);
2571 
2572     virtual void init(void);
2573     virtual void deinit(void);
2574 
2575 private:
2576     bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
2577     void createBuffer(int bufferSize, int iteration);
2578     void deleteBuffer(int bufferSize);
2579     void useBuffer(int bufferSize);
2580 
2581     virtual void testBufferUpload(UploadSampleResult<SampleType> &result, int writeSize) = 0;
2582     void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);
2583 
2584     uint32_t m_unusedBufferID;
2585 
2586 protected:
2587     const CaseType m_caseType;
2588     const ResultType m_resultType;
2589     const uint32_t m_bufferUsage;
2590     const bool m_logBufferInfo;
2591     const bool m_bufferUnspecifiedContent;
2592     std::vector<uint8_t> m_zeroData;
2593 
2594     using BasicBufferCase<SampleType>::m_testCtx;
2595     using BasicBufferCase<SampleType>::m_context;
2596 
2597     using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
2598     using BasicBufferCase<SampleType>::m_minimalProgram;
2599     using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
2600     using BasicBufferCase<SampleType>::m_bufferID;
2601     using BasicBufferCase<SampleType>::m_numSamples;
2602     using BasicBufferCase<SampleType>::m_bufferSizeMin;
2603     using BasicBufferCase<SampleType>::m_bufferSizeMax;
2604     using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2605 };
2606 
2607 template <typename SampleType>
BasicUploadCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,uint32_t bufferUsage,CaseType caseType,ResultType resultType,int flags)2608 BasicUploadCase<SampleType>::BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
2609                                              int bufferSizeMax, int numSamples, uint32_t bufferUsage, CaseType caseType,
2610                                              ResultType resultType, int flags)
2611     : BasicBufferCase<SampleType>(
2612           context, name, desc, bufferSizeMin, bufferSizeMax, numSamples,
2613           (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2614     , m_unusedBufferID(0)
2615     , m_caseType(caseType)
2616     , m_resultType(resultType)
2617     , m_bufferUsage(bufferUsage)
2618     , m_logBufferInfo((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2619     , m_bufferUnspecifiedContent((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2620     , m_zeroData()
2621 {
2622     DE_ASSERT(m_caseType < CASE_LAST);
2623 }
2624 
2625 template <typename SampleType>
~BasicUploadCase(void)2626 BasicUploadCase<SampleType>::~BasicUploadCase(void)
2627 {
2628     deinit();
2629 }
2630 
2631 template <typename SampleType>
init(void)2632 void BasicUploadCase<SampleType>::init(void)
2633 {
2634     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2635 
2636     BasicBufferCase<SampleType>::init();
2637 
2638     // zero buffer as upload source
2639     m_zeroData.resize(m_bufferSizeMax, 0x00);
2640 
2641     // unused buffer
2642 
2643     gl.genBuffers(1, &m_unusedBufferID);
2644     GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2645 
2646     // log basic info
2647 
2648     m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << m_numSamples
2649                        << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
2650                           "tested before samples at odd positions.\n"
2651                        << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
2652                        << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;
2653 
2654     if (m_logBufferInfo)
2655     {
2656         switch (m_caseType)
2657         {
2658         case CASE_NO_BUFFERS:
2659             break;
2660 
2661         case CASE_NEW_BUFFER:
2662             m_testCtx.getLog() << tcu::TestLog::Message
2663                                << "Target buffer is generated but not specified (i.e glBufferData() not called)."
2664                                << tcu::TestLog::EndMessage;
2665             break;
2666 
2667         case CASE_UNSPECIFIED_BUFFER:
2668             m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)."
2669                                << tcu::TestLog::EndMessage;
2670             break;
2671 
2672         case CASE_SPECIFIED_BUFFER:
2673             m_testCtx.getLog() << tcu::TestLog::Message
2674                                << "Target buffer contents are specified prior testing with glBufferData(data)."
2675                                << tcu::TestLog::EndMessage;
2676             break;
2677 
2678         case CASE_USED_BUFFER:
2679             m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing."
2680                                << tcu::TestLog::EndMessage;
2681             break;
2682 
2683         case CASE_USED_LARGER_BUFFER:
2684             m_testCtx.getLog() << tcu::TestLog::Message
2685                                << "Target buffer is larger and has been used in drawing before testing."
2686                                << tcu::TestLog::EndMessage;
2687             break;
2688 
2689         default:
2690             DE_ASSERT(false);
2691             break;
2692         }
2693     }
2694 
2695     if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2696         m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples."
2697                            << tcu::TestLog::EndMessage;
2698     else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2699         m_testCtx.getLog() << tcu::TestLog::Message
2700                            << "Test result is the asymptotic transfer rate as the buffer size approaches infinity."
2701                            << tcu::TestLog::EndMessage;
2702     else
2703         DE_ASSERT(false);
2704 }
2705 
2706 template <typename SampleType>
deinit(void)2707 void BasicUploadCase<SampleType>::deinit(void)
2708 {
2709     if (m_unusedBufferID)
2710     {
2711         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_unusedBufferID);
2712         m_unusedBufferID = 0;
2713     }
2714 
2715     m_zeroData = std::vector<uint8_t>();
2716 
2717     BasicBufferCase<SampleType>::deinit();
2718 }
2719 
2720 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)2721 bool BasicUploadCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
2722 {
2723     const glw::Functions &gl      = m_context.getRenderContext().getFunctions();
2724     const int allocatedBufferSize = sample.allocatedSize;
2725     const int bufferSize          = sample.bufferSize;
2726 
2727     if (m_caseType != CASE_NO_BUFFERS)
2728         createBuffer(iteration, allocatedBufferSize);
2729 
2730     // warmup CPU before the test to make sure the power management governor
2731     // keeps us in the "high performance" mode
2732     {
2733         deYield();
2734         tcu::warmupCPU();
2735         deYield();
2736     }
2737 
2738     testBufferUpload(sample, bufferSize);
2739     GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2740 
2741     if (m_caseType != CASE_NO_BUFFERS)
2742         deleteBuffer(bufferSize);
2743 
2744     return true;
2745 }
2746 
2747 template <typename SampleType>
createBuffer(int iteration,int bufferSize)2748 void BasicUploadCase<SampleType>::createBuffer(int iteration, int bufferSize)
2749 {
2750     DE_ASSERT(!m_bufferID);
2751     DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2752 
2753     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2754 
2755     // create buffer
2756 
2757     if (m_caseType == CASE_NO_BUFFERS)
2758         return;
2759 
2760     // create empty buffer
2761 
2762     gl.genBuffers(1, &m_bufferID);
2763     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2764     GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2765 
2766     if (m_caseType == CASE_NEW_BUFFER)
2767     {
2768         // upload something else first, this should reduce noise in samples
2769 
2770         de::Random rng(0xbadc * iteration);
2771         const int sizeDelta = rng.getInt(0, 2097140);
2772         const int unusedUploadSize =
2773             deAlign32(1048576 + sizeDelta, 4 * 4); // Vary buffer size to make sure it is always reallocated
2774         const std::vector<uint8_t> unusedData(unusedUploadSize, 0x20);
2775 
2776         gl.bindBuffer(GL_ARRAY_BUFFER, m_unusedBufferID);
2777         gl.bufferData(GL_ARRAY_BUFFER, unusedUploadSize, &unusedData[0], m_bufferUsage);
2778 
2779         // make sure upload won't interfere with the test
2780         useBuffer(unusedUploadSize);
2781 
2782         // don't kill the buffer so that the following upload cannot potentially reuse the buffer
2783 
2784         return;
2785     }
2786 
2787     // specify it
2788 
2789     if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2790         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2791     else
2792     {
2793         const std::vector<uint8_t> unusedData(bufferSize, 0x20);
2794         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);
2795     }
2796 
2797     if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2798         return;
2799 
2800     // use it and make sure it is uploaded
2801 
2802     useBuffer(bufferSize);
2803     DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2804 }
2805 
2806 template <typename SampleType>
deleteBuffer(int bufferSize)2807 void BasicUploadCase<SampleType>::deleteBuffer(int bufferSize)
2808 {
2809     DE_ASSERT(m_bufferID);
2810     DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2811 
2812     // render from the buffer to make sure it actually made it to the gpu. This is to
2813     // make sure that if the upload actually happens later or is happening right now in
2814     // the background, it will not interfere with further test runs
2815 
2816     // if buffer contains unspecified content, sourcing data from it results in undefined
2817     // results, possibly including program termination. Specify all data to prevent such
2818     // case from happening
2819 
2820     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2821 
2822     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2823 
2824     if (m_bufferUnspecifiedContent)
2825     {
2826         const std::vector<uint8_t> unusedData(bufferSize, 0x20);
2827         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);
2828 
2829         GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2830     }
2831 
2832     useBuffer(bufferSize);
2833 
2834     gl.deleteBuffers(1, &m_bufferID);
2835     m_bufferID = 0;
2836 }
2837 
2838 template <typename SampleType>
useBuffer(int bufferSize)2839 void BasicUploadCase<SampleType>::useBuffer(int bufferSize)
2840 {
2841     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2842 
2843     gl.useProgram(m_minimalProgram->getProgram());
2844 
2845     gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
2846     gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2847     gl.enableVertexAttribArray(m_minimalProgramPosLoc);
2848 
2849     // use whole buffer to make sure buffer is uploaded by drawing first and last
2850     DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2851     gl.drawArrays(GL_POINTS, 0, 1);
2852     gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2853 
2854     BasicBufferCase<SampleType>::waitGLResults();
2855 }
2856 
2857 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)2858 void BasicUploadCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
2859 {
2860     const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, true);
2861 
2862     // with small buffers, report the median transfer rate of the samples
2863     // with large buffers, report the expected preformance of infinitely large buffers
2864     const float rate = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) :
2865                                                                            (analysis.transferRateMedian);
2866 
2867     if (rate == std::numeric_limits<float>::infinity())
2868     {
2869         // sample times are 1) invalid or 2) timer resolution too low
2870         // report speed 0 bytes / s since real value cannot be determined
2871         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2872     }
2873     else
2874     {
2875         // report transfer rate in MB / s
2876         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2877     }
2878 }
2879 
2880 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2881 {
2882 public:
2883     ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2884                         int numSamples, bool largeBuffersCase);
2885     ~ReferenceMemcpyCase(void);
2886 
2887     void init(void);
2888     void deinit(void);
2889 
2890 private:
2891     void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
2892 
2893     std::vector<uint8_t> m_dstBuf;
2894 };
2895 
ReferenceMemcpyCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,bool largeBuffersCase)2896 ReferenceMemcpyCase::ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
2897                                          int maxBufferSize, int numSamples, bool largeBuffersCase)
2898     : BasicUploadCase<SingleOperationDuration>(
2899           ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS,
2900           (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2901     , m_dstBuf()
2902 {
2903     disableGLWarmup();
2904 }
2905 
~ReferenceMemcpyCase(void)2906 ReferenceMemcpyCase::~ReferenceMemcpyCase(void)
2907 {
2908 }
2909 
init(void)2910 void ReferenceMemcpyCase::init(void)
2911 {
2912     // Describe what the test tries to do
2913     m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2914 
2915     m_dstBuf.resize(m_bufferSizeMax, 0x00);
2916 
2917     BasicUploadCase<SingleOperationDuration>::init();
2918 }
2919 
deinit(void)2920 void ReferenceMemcpyCase::deinit(void)
2921 {
2922     m_dstBuf = std::vector<uint8_t>();
2923     BasicUploadCase<SingleOperationDuration>::deinit();
2924 }
2925 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2926 void ReferenceMemcpyCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
2927 {
2928     // write
2929     result.duration.totalDuration       = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2930     result.duration.fitResponseDuration = result.duration.totalDuration;
2931 
2932     result.writtenSize = bufferSize;
2933 }
2934 
2935 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2936 {
2937 public:
2938     BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2939                          int numSamples, uint32_t bufferUsage, CaseType caseType);
2940     ~BufferDataUploadCase(void);
2941 
2942     void init(void);
2943 
2944 private:
2945     void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
2946 };
2947 
BufferDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,CaseType caseType)2948 BufferDataUploadCase::BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
2949                                            int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType caseType)
2950     : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
2951                                                caseType, RESULT_MEDIAN_TRANSFER_RATE)
2952 {
2953 }
2954 
~BufferDataUploadCase(void)2955 BufferDataUploadCase::~BufferDataUploadCase(void)
2956 {
2957 }
2958 
init(void)2959 void BufferDataUploadCase::init(void)
2960 {
2961     // Describe what the test tries to do
2962     m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2963 
2964     BasicUploadCase<SingleOperationDuration>::init();
2965 }
2966 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2967 void BufferDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
2968 {
2969     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
2970 
2971     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2972 
2973     // upload
2974     {
2975         uint64_t startTime;
2976         uint64_t endTime;
2977 
2978         startTime = deGetMicroseconds();
2979         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2980         endTime = deGetMicroseconds();
2981 
2982         result.duration.totalDuration       = endTime - startTime;
2983         result.duration.fitResponseDuration = result.duration.totalDuration;
2984         result.writtenSize                  = bufferSize;
2985     }
2986 }
2987 
2988 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2989 {
2990 public:
2991     enum Flags
2992     {
2993         FLAG_FULL_UPLOAD           = 0x01,
2994         FLAG_PARTIAL_UPLOAD        = 0x02,
2995         FLAG_INVALIDATE_BEFORE_USE = 0x04,
2996     };
2997 
2998     BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
2999                             int numSamples, uint32_t bufferUsage, CaseType parentCase, int flags);
3000     ~BufferSubDataUploadCase(void);
3001 
3002     void init(void);
3003 
3004 private:
3005     void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3006 
3007     const bool m_fullUpload;
3008     const bool m_invalidateBeforeUse;
3009 };
3010 
BufferSubDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,CaseType parentCase,int flags)3011 BufferSubDataUploadCase::BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3012                                                  int maxBufferSize, int numSamples, uint32_t bufferUsage,
3013                                                  CaseType parentCase, int flags)
3014     : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
3015                                                parentCase, RESULT_MEDIAN_TRANSFER_RATE)
3016     , m_fullUpload((flags & FLAG_FULL_UPLOAD) != 0)
3017     , m_invalidateBeforeUse((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
3018 {
3019     DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
3020     DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
3021 }
3022 
~BufferSubDataUploadCase(void)3023 BufferSubDataUploadCase::~BufferSubDataUploadCase(void)
3024 {
3025 }
3026 
init(void)3027 void BufferSubDataUploadCase::init(void)
3028 {
3029     // Describe what the test tries to do
3030     m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferSubData() function call performance. "
3031                        << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") :
3032                                             ("Half of the buffer data is updated with glBufferSubData. "))
3033                        << ((m_invalidateBeforeUse) ?
3034                                ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") :
3035                                (""))
3036                        << "\n"
3037                        << tcu::TestLog::EndMessage;
3038 
3039     BasicUploadCase<SingleOperationDuration>::init();
3040 }
3041 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3042 void BufferSubDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
3043 {
3044     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3045 
3046     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3047 
3048     // "invalidate", upload null
3049     if (m_invalidateBeforeUse)
3050         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3051 
3052     // upload
3053     {
3054         uint64_t startTime;
3055         uint64_t endTime;
3056 
3057         startTime = deGetMicroseconds();
3058 
3059         if (m_fullUpload)
3060             gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
3061         else
3062         {
3063             // upload to buffer center
3064             gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
3065         }
3066 
3067         endTime = deGetMicroseconds();
3068 
3069         result.duration.totalDuration       = endTime - startTime;
3070         result.duration.fitResponseDuration = result.duration.totalDuration;
3071 
3072         if (m_fullUpload)
3073             result.writtenSize = bufferSize;
3074         else
3075             result.writtenSize = bufferSize / 2;
3076     }
3077 }
3078 
3079 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
3080 {
3081 public:
3082     enum Flags
3083     {
3084         FLAG_PARTIAL                       = 0x01,
3085         FLAG_MANUAL_INVALIDATION           = 0x02,
3086         FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
3087         FLAG_USE_UNUSED_SPECIFIED_BUFFER   = 0x08,
3088     };
3089 
3090     MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
3091                        int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
3092     ~MapBufferRangeCase(void);
3093 
3094     void init(void);
3095 
3096 private:
3097     static CaseType getBaseCaseType(int caseFlags);
3098     static int getBaseFlags(uint32_t mapFlags, int caseFlags);
3099 
3100     void testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);
3101     void attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);
3102 
3103     const bool m_manualInvalidation;
3104     const bool m_fullUpload;
3105     const bool m_useUnusedUnspecifiedBuffer;
3106     const bool m_useUnusedSpecifiedBuffer;
3107     const uint32_t m_mapFlags;
3108     int m_unmapFailures;
3109 };
3110 
MapBufferRangeCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,uint32_t mapFlags,int caseFlags)3111 MapBufferRangeCase::MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3112                                        int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags,
3113                                        int caseFlags)
3114     : BasicUploadCase<MapBufferRangeDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
3115                                               getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
3116                                               getBaseFlags(mapFlags, caseFlags))
3117     , m_manualInvalidation((caseFlags & FLAG_MANUAL_INVALIDATION) != 0)
3118     , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
3119     , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3120     , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
3121     , m_mapFlags(mapFlags)
3122     , m_unmapFailures(0)
3123 {
3124     DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
3125     DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
3126 }
3127 
~MapBufferRangeCase(void)3128 MapBufferRangeCase::~MapBufferRangeCase(void)
3129 {
3130 }
3131 
init(void)3132 void MapBufferRangeCase::init(void)
3133 {
3134     // Describe what the test tries to do
3135     m_testCtx.getLog()
3136         << tcu::TestLog::Message << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
3137         << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
3138         << ((m_useUnusedUnspecifiedBuffer) ?
3139                 ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
3140                 (""))
3141         << ((m_useUnusedSpecifiedBuffer) ?
3142                 ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
3143                 (""))
3144         << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
3145                 ("The buffer has previously been used in a drawing operation.\n") :
3146                 (""))
3147         << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
3148         << "Map bits:\n"
3149         << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3150         << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3151         << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3152         << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3153         << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3154         << tcu::TestLog::EndMessage;
3155 
3156     BasicUploadCase<MapBufferRangeDuration>::init();
3157 }
3158 
getBaseCaseType(int caseFlags)3159 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType(int caseFlags)
3160 {
3161     if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
3162         return CASE_USED_BUFFER;
3163     else
3164         return CASE_NEW_BUFFER;
3165 }
3166 
getBaseFlags(uint32_t mapFlags,int caseFlags)3167 int MapBufferRangeCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
3168 {
3169     int flags = FLAG_DONT_LOG_BUFFER_INFO;
3170 
3171     // If buffer contains unspecified data when it is sourced (i.e drawn)
3172     // results are undefined, and system errors may occur. Signal parent
3173     // class to take this into account
3174     if (caseFlags & FLAG_PARTIAL)
3175     {
3176         if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_MANUAL_INVALIDATION) != 0 ||
3177             (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3178         {
3179             flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3180         }
3181     }
3182 
3183     return flags;
3184 }
3185 
testBufferUpload(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)3186 void MapBufferRangeCase::testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
3187 {
3188     const int unmapFailureThreshold = 4;
3189 
3190     for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3191     {
3192         try
3193         {
3194             attemptBufferMap(result, bufferSize);
3195             return;
3196         }
3197         catch (UnmapFailureError &)
3198         {
3199         }
3200     }
3201 
3202     throw tcu::TestError("Unmapping failures exceeded limit");
3203 }
3204 
attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)3205 void MapBufferRangeCase::attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
3206 {
3207     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3208 
3209     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3210 
3211     if (m_fullUpload)
3212         result.writtenSize = bufferSize;
3213     else
3214         result.writtenSize = bufferSize / 2;
3215 
3216     // Create unused buffer
3217 
3218     if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
3219     {
3220         uint64_t startTime;
3221         uint64_t endTime;
3222 
3223         // "invalidate" or allocate, upload null
3224         startTime = deGetMicroseconds();
3225         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3226         endTime = deGetMicroseconds();
3227 
3228         result.duration.allocDuration = endTime - startTime;
3229     }
3230     else if (m_useUnusedSpecifiedBuffer)
3231     {
3232         uint64_t startTime;
3233         uint64_t endTime;
3234 
3235         // Specify buffer contents
3236         startTime = deGetMicroseconds();
3237         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3238         endTime = deGetMicroseconds();
3239 
3240         result.duration.allocDuration = endTime - startTime;
3241     }
3242     else
3243     {
3244         // No alloc, no time
3245         result.duration.allocDuration = 0;
3246     }
3247 
3248     // upload
3249     {
3250         void *mapPtr;
3251 
3252         // Map
3253         {
3254             uint64_t startTime;
3255             uint64_t endTime;
3256 
3257             startTime = deGetMicroseconds();
3258             if (m_fullUpload)
3259                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
3260             else
3261             {
3262                 // upload to buffer center
3263                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
3264             }
3265             endTime = deGetMicroseconds();
3266 
3267             if (!mapPtr)
3268                 throw tcu::Exception("MapBufferRange returned NULL");
3269 
3270             result.duration.mapDuration = endTime - startTime;
3271         }
3272 
3273         // Write
3274         {
3275             result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3276         }
3277 
3278         // Unmap
3279         {
3280             uint64_t startTime;
3281             uint64_t endTime;
3282             glw::GLboolean unmapSuccessful;
3283 
3284             startTime       = deGetMicroseconds();
3285             unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3286             endTime         = deGetMicroseconds();
3287 
3288             // if unmapping fails, just try again later
3289             if (!unmapSuccessful)
3290                 throw UnmapFailureError();
3291 
3292             result.duration.unmapDuration = endTime - startTime;
3293         }
3294 
3295         result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
3296                                         result.duration.unmapDuration + result.duration.allocDuration;
3297         result.duration.fitResponseDuration = result.duration.totalDuration;
3298     }
3299 }
3300 
3301 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
3302 {
3303 public:
3304     enum Flags
3305     {
3306         FLAG_PARTIAL                       = 0x01,
3307         FLAG_FLUSH_IN_PARTS                = 0x02,
3308         FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
3309         FLAG_USE_UNUSED_SPECIFIED_BUFFER   = 0x08,
3310         FLAG_FLUSH_PARTIAL                 = 0x10,
3311     };
3312 
3313     MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
3314                             int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
3315     ~MapBufferRangeFlushCase(void);
3316 
3317     void init(void);
3318 
3319 private:
3320     static CaseType getBaseCaseType(int caseFlags);
3321     static int getBaseFlags(uint32_t mapFlags, int caseFlags);
3322 
3323     void testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);
3324     void attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);
3325 
3326     const bool m_fullUpload;
3327     const bool m_flushInParts;
3328     const bool m_flushPartial;
3329     const bool m_useUnusedUnspecifiedBuffer;
3330     const bool m_useUnusedSpecifiedBuffer;
3331     const uint32_t m_mapFlags;
3332     int m_unmapFailures;
3333 };
3334 
MapBufferRangeFlushCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,uint32_t bufferUsage,uint32_t mapFlags,int caseFlags)3335 MapBufferRangeFlushCase::MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
3336                                                  int maxBufferSize, int numSamples, uint32_t bufferUsage,
3337                                                  uint32_t mapFlags, int caseFlags)
3338     : BasicUploadCase<MapBufferRangeFlushDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples,
3339                                                    bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
3340                                                    getBaseFlags(mapFlags, caseFlags))
3341     , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
3342     , m_flushInParts((caseFlags & FLAG_FLUSH_IN_PARTS) != 0)
3343     , m_flushPartial((caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3344     , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
3345     , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
3346     , m_mapFlags(mapFlags)
3347     , m_unmapFailures(0)
3348 {
3349     DE_ASSERT(!(m_flushPartial && m_flushInParts));
3350     DE_ASSERT(!(m_flushPartial && !m_fullUpload));
3351 }
3352 
~MapBufferRangeFlushCase(void)3353 MapBufferRangeFlushCase::~MapBufferRangeFlushCase(void)
3354 {
3355 }
3356 
init(void)3357 void MapBufferRangeFlushCase::init(void)
3358 {
3359     // Describe what the test tries to do
3360     m_testCtx.getLog()
3361         << tcu::TestLog::Message
3362         << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
3363         << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
3364         << ((m_flushInParts) ?
3365                 ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
3366             (m_flushPartial) ? ("Half of the buffer range is flushed.") :
3367                                ("The whole mapped range is flushed in one flush call."))
3368         << "\n"
3369         << ((m_useUnusedUnspecifiedBuffer) ?
3370                 ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
3371                 (""))
3372         << ((m_useUnusedSpecifiedBuffer) ?
3373                 ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
3374                 (""))
3375         << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
3376                 ("The buffer has previously been used in a drawing operation.\n") :
3377                 (""))
3378         << "Map bits:\n"
3379         << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3380         << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3381         << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3382         << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3383         << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3384         << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3385         << tcu::TestLog::EndMessage;
3386 
3387     BasicUploadCase<MapBufferRangeFlushDuration>::init();
3388 }
3389 
getBaseCaseType(int caseFlags)3390 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType(int caseFlags)
3391 {
3392     if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
3393         return CASE_USED_BUFFER;
3394     else
3395         return CASE_NEW_BUFFER;
3396 }
3397 
getBaseFlags(uint32_t mapFlags,int caseFlags)3398 int MapBufferRangeFlushCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
3399 {
3400     int flags = FLAG_DONT_LOG_BUFFER_INFO;
3401 
3402     // If buffer contains unspecified data when it is sourced (i.e drawn)
3403     // results are undefined, and system errors may occur. Signal parent
3404     // class to take this into account
3405     if (caseFlags & FLAG_PARTIAL)
3406     {
3407         if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0 ||
3408             (caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3409         {
3410             flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3411         }
3412     }
3413 
3414     return flags;
3415 }
3416 
testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3417 void MapBufferRangeFlushCase::testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
3418 {
3419     const int unmapFailureThreshold = 4;
3420 
3421     for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3422     {
3423         try
3424         {
3425             attemptBufferMap(result, bufferSize);
3426             return;
3427         }
3428         catch (UnmapFailureError &)
3429         {
3430         }
3431     }
3432 
3433     throw tcu::TestError("Unmapping failures exceeded limit");
3434 }
3435 
attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3436 void MapBufferRangeFlushCase::attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
3437 {
3438     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3439     const int mappedSize     = (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3440 
3441     if (m_fullUpload && !m_flushPartial)
3442         result.writtenSize = bufferSize;
3443     else
3444         result.writtenSize = bufferSize / 2;
3445 
3446     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3447 
3448     // Create unused buffer
3449 
3450     if (m_useUnusedUnspecifiedBuffer)
3451     {
3452         uint64_t startTime;
3453         uint64_t endTime;
3454 
3455         // Don't specify contents
3456         startTime = deGetMicroseconds();
3457         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3458         endTime = deGetMicroseconds();
3459 
3460         result.duration.allocDuration = endTime - startTime;
3461     }
3462     else if (m_useUnusedSpecifiedBuffer)
3463     {
3464         uint64_t startTime;
3465         uint64_t endTime;
3466 
3467         // Specify buffer contents
3468         startTime = deGetMicroseconds();
3469         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3470         endTime = deGetMicroseconds();
3471 
3472         result.duration.allocDuration = endTime - startTime;
3473     }
3474     else
3475     {
3476         // No alloc, no time
3477         result.duration.allocDuration = 0;
3478     }
3479 
3480     // upload
3481     {
3482         void *mapPtr;
3483 
3484         // Map
3485         {
3486             uint64_t startTime;
3487             uint64_t endTime;
3488 
3489             startTime = deGetMicroseconds();
3490             if (m_fullUpload)
3491                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3492             else
3493             {
3494                 // upload to buffer center
3495                 mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3496             }
3497             endTime = deGetMicroseconds();
3498 
3499             if (!mapPtr)
3500                 throw tcu::Exception("MapBufferRange returned NULL");
3501 
3502             result.duration.mapDuration = endTime - startTime;
3503         }
3504 
3505         // Write
3506         {
3507             if (!m_flushPartial)
3508                 result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3509             else
3510                 result.duration.writeDuration =
3511                     medianTimeMemcpy((uint8_t *)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3512         }
3513 
3514         // Flush
3515         {
3516             uint64_t startTime;
3517             uint64_t endTime;
3518 
3519             startTime = deGetMicroseconds();
3520 
3521             if (m_flushPartial)
3522                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize / 4, mappedSize / 2);
3523             else if (!m_flushInParts)
3524                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3525             else
3526             {
3527                 const int p1 = 0;
3528                 const int p2 = mappedSize / 3;
3529                 const int p3 = mappedSize / 2;
3530                 const int p4 = mappedSize * 2 / 4;
3531                 const int p5 = mappedSize;
3532 
3533                 // flush in mixed order
3534                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2, p3 - p2);
3535                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1, p2 - p1);
3536                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4, p5 - p4);
3537                 gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3, p4 - p3);
3538             }
3539 
3540             endTime = deGetMicroseconds();
3541 
3542             result.duration.flushDuration = endTime - startTime;
3543         }
3544 
3545         // Unmap
3546         {
3547             uint64_t startTime;
3548             uint64_t endTime;
3549             glw::GLboolean unmapSuccessful;
3550 
3551             startTime       = deGetMicroseconds();
3552             unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3553             endTime         = deGetMicroseconds();
3554 
3555             // if unmapping fails, just try again later
3556             if (!unmapSuccessful)
3557                 throw UnmapFailureError();
3558 
3559             result.duration.unmapDuration = endTime - startTime;
3560         }
3561 
3562         result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
3563                                         result.duration.flushDuration + result.duration.unmapDuration +
3564                                         result.duration.allocDuration;
3565         result.duration.fitResponseDuration = result.duration.totalDuration;
3566     }
3567 }
3568 
3569 template <typename SampleType>
3570 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3571 {
3572 public:
3573     ModifyAfterBasicCase(Context &context, const char *name, const char *description, int bufferSizeMin,
3574                          int bufferSizeMax, uint32_t usage, bool bufferUnspecifiedAfterTest);
3575     ~ModifyAfterBasicCase(void);
3576 
3577     void init(void);
3578     void deinit(void);
3579 
3580 protected:
3581     void drawBufferRange(int begin, int end);
3582 
3583 private:
3584     enum
3585     {
3586         NUM_SAMPLES = 20,
3587     };
3588 
3589     bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
3590     bool prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result, int bufferSize);
3591     void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);
3592 
3593     virtual void testWithBufferSize(UploadSampleResult<SampleType> &result, int bufferSize) = 0;
3594 
3595     int m_unmappingErrors;
3596 
3597 protected:
3598     const bool m_bufferUnspecifiedAfterTest;
3599     const uint32_t m_bufferUsage;
3600     std::vector<uint8_t> m_zeroData;
3601 
3602     using BasicBufferCase<SampleType>::m_testCtx;
3603     using BasicBufferCase<SampleType>::m_context;
3604 
3605     using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
3606     using BasicBufferCase<SampleType>::m_minimalProgram;
3607     using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
3608     using BasicBufferCase<SampleType>::m_bufferID;
3609     using BasicBufferCase<SampleType>::m_numSamples;
3610     using BasicBufferCase<SampleType>::m_bufferSizeMin;
3611     using BasicBufferCase<SampleType>::m_bufferSizeMax;
3612     using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3613 };
3614 
3615 template <typename SampleType>
ModifyAfterBasicCase(Context & context,const char * name,const char * description,int bufferSizeMin,int bufferSizeMax,uint32_t usage,bool bufferUnspecifiedAfterTest)3616 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase(Context &context, const char *name, const char *description,
3617                                                        int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3618                                                        bool bufferUnspecifiedAfterTest)
3619     : BasicBufferCase<SampleType>(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3620     , m_unmappingErrors(0)
3621     , m_bufferUnspecifiedAfterTest(bufferUnspecifiedAfterTest)
3622     , m_bufferUsage(usage)
3623     , m_zeroData()
3624 {
3625 }
3626 
3627 template <typename SampleType>
~ModifyAfterBasicCase(void)3628 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase(void)
3629 {
3630     BasicBufferCase<SampleType>::deinit();
3631 }
3632 
3633 template <typename SampleType>
init(void)3634 void ModifyAfterBasicCase<SampleType>::init(void)
3635 {
3636     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3637 
3638     // init parent
3639 
3640     BasicBufferCase<SampleType>::init();
3641 
3642     // upload source
3643     m_zeroData.resize(m_bufferSizeMax, 0x00);
3644 
3645     // log basic info
3646 
3647     m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << (int)NUM_SAMPLES
3648                        << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
3649                           "tested before samples at odd positions.\n"
3650                        << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
3651                        << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;
3652 
3653     // log which transfer rate is the test result and buffer info
3654 
3655     m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples.\n"
3656                        << "Buffer usage = " << glu::getUsageName(m_bufferUsage) << tcu::TestLog::EndMessage;
3657 
3658     // Set state for drawing so that we don't have to change these during the iteration
3659     {
3660         gl.useProgram(m_minimalProgram->getProgram());
3661         gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
3662         gl.enableVertexAttribArray(m_minimalProgramPosLoc);
3663     }
3664 }
3665 
3666 template <typename SampleType>
deinit(void)3667 void ModifyAfterBasicCase<SampleType>::deinit(void)
3668 {
3669     m_zeroData = std::vector<uint8_t>();
3670 
3671     BasicBufferCase<SampleType>::deinit();
3672 }
3673 
3674 template <typename SampleType>
drawBufferRange(int begin,int end)3675 void ModifyAfterBasicCase<SampleType>::drawBufferRange(int begin, int end)
3676 {
3677     DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3678     DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3679 
3680     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3681 
3682     // use given range
3683     gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3684     gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3685 }
3686 
3687 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)3688 bool ModifyAfterBasicCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
3689 {
3690     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3691     const int bufferSize     = sample.bufferSize;
3692     bool testOk;
3693 
3694     testOk = prepareAndRunTest(iteration, sample, bufferSize);
3695     GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3696 
3697     if (!testOk)
3698     {
3699         const int unmapFailureThreshold = 4;
3700 
3701         // only unmapping error can cause iteration failure
3702         if (++m_unmappingErrors >= unmapFailureThreshold)
3703             throw tcu::TestError("Too many unmapping errors, cannot continue.");
3704 
3705         // just try again
3706         return false;
3707     }
3708 
3709     return true;
3710 }
3711 
3712 template <typename SampleType>
prepareAndRunTest(int iteration,UploadSampleResult<SampleType> & result,int bufferSize)3713 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result,
3714                                                          int bufferSize)
3715 {
3716     DE_UNREF(iteration);
3717 
3718     DE_ASSERT(!m_bufferID);
3719     DE_ASSERT(deIsAligned32(bufferSize, 4 * 4)); // aligned to vec4
3720 
3721     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3722     bool testRunOk           = true;
3723     bool unmappingFailed     = false;
3724 
3725     // Upload initial buffer to the GPU...
3726     gl.genBuffers(1, &m_bufferID);
3727     gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3728     gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3729 
3730     // ...use it...
3731     gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3732     drawBufferRange(0, bufferSize);
3733 
3734     // ..and make sure it is uploaded
3735     BasicBufferCase<SampleType>::waitGLResults();
3736 
3737     // warmup CPU before the test to make sure the power management governor
3738     // keeps us in the "high performance" mode
3739     {
3740         deYield();
3741         tcu::warmupCPU();
3742         deYield();
3743     }
3744 
3745     // test
3746     try
3747     {
3748         // buffer is uploaded to the GPU. Draw from it.
3749         drawBufferRange(0, bufferSize);
3750 
3751         // and test upload
3752         testWithBufferSize(result, bufferSize);
3753     }
3754     catch (UnmapFailureError &)
3755     {
3756         testRunOk       = false;
3757         unmappingFailed = true;
3758     }
3759 
3760     // clean up: make sure buffer is not in upload queue and delete it
3761 
3762     // sourcing unspecified data causes undefined results, possibly program termination
3763     if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3764         gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3765 
3766     drawBufferRange(0, bufferSize);
3767     BasicBufferCase<SampleType>::waitGLResults();
3768 
3769     gl.deleteBuffers(1, &m_bufferID);
3770     m_bufferID = 0;
3771 
3772     return testRunOk;
3773 }
3774 
3775 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)3776 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
3777 {
3778     const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3779 
3780     // Return median transfer rate of the samples
3781 
3782     if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3783     {
3784         // sample times are 1) invalid or 2) timer resolution too low
3785         // report speed 0 bytes / s since real value cannot be determined
3786         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3787     }
3788     else
3789     {
3790         // report transfer rate in MB / s
3791         m_testCtx.setTestResult(QP_TEST_RESULT_PASS,
3792                                 de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3793     }
3794 }
3795 
3796 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3797 {
3798 public:
3799     enum CaseFlags
3800     {
3801         FLAG_RESPECIFY_SIZE  = 0x1,
3802         FLAG_UPLOAD_REPEATED = 0x2,
3803     };
3804 
3805     ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
3806                                   int bufferSizeMax, uint32_t usage, int flags);
3807     ~ModifyAfterWithBufferDataCase(void);
3808 
3809     void init(void);
3810     void deinit(void);
3811 
3812 private:
3813     void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3814 
3815     enum
3816     {
3817         NUM_REPEATS = 2
3818     };
3819 
3820     const bool m_respecifySize;
3821     const bool m_repeatedUpload;
3822     const float m_sizeDifferenceFactor;
3823 };
3824 
ModifyAfterWithBufferDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags)3825 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc,
3826                                                              int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3827                                                              int flags)
3828     : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3829     , m_respecifySize((flags & FLAG_RESPECIFY_SIZE) != 0)
3830     , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
3831     , m_sizeDifferenceFactor(1.3f)
3832 {
3833     DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3834 }
3835 
~ModifyAfterWithBufferDataCase(void)3836 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase(void)
3837 {
3838     deinit();
3839 }
3840 
init(void)3841 void ModifyAfterWithBufferDataCase::init(void)
3842 {
3843     // Log the purpose of the test
3844 
3845     if (m_repeatedUpload)
3846         m_testCtx.getLog() << tcu::TestLog::Message
3847                            << "Testing performance of BufferData() command after \"specify buffer contents - draw "
3848                               "buffer\" command pair is repeated "
3849                            << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3850     else
3851         m_testCtx.getLog() << tcu::TestLog::Message
3852                            << "Testing performance of BufferData() command after a draw command that sources data from "
3853                               "the target buffer."
3854                            << tcu::TestLog::EndMessage;
3855 
3856     m_testCtx.getLog() << tcu::TestLog::Message
3857                        << ((m_respecifySize) ?
3858                                ("Buffer size is increased and contents are modified with BufferData().\n") :
3859                                ("Buffer contents are modified with BufferData().\n"))
3860                        << tcu::TestLog::EndMessage;
3861 
3862     // init parent
3863     ModifyAfterBasicCase<SingleOperationDuration>::init();
3864 
3865     // make sure our zeroBuffer is large enough
3866     if (m_respecifySize)
3867     {
3868         const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4 * 4);
3869         m_zeroData.resize(largerBufferSize, 0x00);
3870     }
3871 }
3872 
deinit(void)3873 void ModifyAfterWithBufferDataCase::deinit(void)
3874 {
3875     ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3876 }
3877 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3878 void ModifyAfterWithBufferDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
3879                                                        int bufferSize)
3880 {
3881     // always draw the same amount to make compares between cases sensible
3882     const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
3883     const int drawEnd   = deAlign32(bufferSize * 3 / 4, 4 * 4);
3884 
3885     const glw::Functions &gl   = m_context.getRenderContext().getFunctions();
3886     const int largerBufferSize = deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4 * 4);
3887     const int newBufferSize    = (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3888     uint64_t startTime;
3889     uint64_t endTime;
3890 
3891     // repeat upload-draw
3892     if (m_repeatedUpload)
3893     {
3894         for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3895         {
3896             gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3897             drawBufferRange(drawStart, drawEnd);
3898         }
3899     }
3900 
3901     // test upload
3902     startTime = deGetMicroseconds();
3903     gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3904     endTime = deGetMicroseconds();
3905 
3906     result.duration.totalDuration       = endTime - startTime;
3907     result.duration.fitResponseDuration = result.duration.totalDuration;
3908     result.writtenSize                  = newBufferSize;
3909 }
3910 
3911 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3912 {
3913 public:
3914     enum CaseFlags
3915     {
3916         FLAG_PARTIAL         = 0x1,
3917         FLAG_UPLOAD_REPEATED = 0x2,
3918     };
3919 
3920     ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
3921                                      int bufferSizeMax, uint32_t usage, int flags);
3922     ~ModifyAfterWithBufferSubDataCase(void);
3923 
3924     void init(void);
3925     void deinit(void);
3926 
3927 private:
3928     void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
3929 
3930     enum
3931     {
3932         NUM_REPEATS = 2
3933     };
3934 
3935     const bool m_partialUpload;
3936     const bool m_repeatedUpload;
3937 };
3938 
ModifyAfterWithBufferSubDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags)3939 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc,
3940                                                                    int bufferSizeMin, int bufferSizeMax, uint32_t usage,
3941                                                                    int flags)
3942     : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3943     , m_partialUpload((flags & FLAG_PARTIAL) != 0)
3944     , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
3945 {
3946 }
3947 
~ModifyAfterWithBufferSubDataCase(void)3948 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase(void)
3949 {
3950     deinit();
3951 }
3952 
init(void)3953 void ModifyAfterWithBufferSubDataCase::init(void)
3954 {
3955     // Log the purpose of the test
3956 
3957     if (m_repeatedUpload)
3958         m_testCtx.getLog() << tcu::TestLog::Message
3959                            << "Testing performance of BufferSubData() command after \"specify buffer contents - draw "
3960                               "buffer\" command pair is repeated "
3961                            << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3962     else
3963         m_testCtx.getLog() << tcu::TestLog::Message
3964                            << "Testing performance of BufferSubData() command after a draw command that sources data "
3965                               "from the target buffer."
3966                            << tcu::TestLog::EndMessage;
3967 
3968     m_testCtx.getLog() << tcu::TestLog::Message
3969                        << ((m_partialUpload) ? ("Half of the buffer contents are modified.\n") :
3970                                                ("Buffer contents are fully respecified.\n"))
3971                        << tcu::TestLog::EndMessage;
3972 
3973     ModifyAfterBasicCase<SingleOperationDuration>::init();
3974 }
3975 
deinit(void)3976 void ModifyAfterWithBufferSubDataCase::deinit(void)
3977 {
3978     ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3979 }
3980 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3981 void ModifyAfterWithBufferSubDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
3982                                                           int bufferSize)
3983 {
3984     // always draw the same amount to make compares between cases sensible
3985     const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
3986     const int drawEnd   = deAlign32(bufferSize * 3 / 4, 4 * 4);
3987 
3988     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
3989     const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
3990     const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
3991     uint64_t startTime;
3992     uint64_t endTime;
3993 
3994     // make upload-draw stream
3995     if (m_repeatedUpload)
3996     {
3997         for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3998         {
3999             gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
4000             drawBufferRange(drawStart, drawEnd);
4001         }
4002     }
4003 
4004     // test upload
4005     startTime = deGetMicroseconds();
4006     gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
4007     endTime = deGetMicroseconds();
4008 
4009     result.duration.totalDuration       = endTime - startTime;
4010     result.duration.fitResponseDuration = result.duration.totalDuration;
4011     result.writtenSize                  = subdataSize;
4012 }
4013 
4014 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
4015 {
4016 public:
4017     enum CaseFlags
4018     {
4019         FLAG_PARTIAL = 0x1,
4020     };
4021 
4022     ModifyAfterWithMapBufferRangeCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
4023                                       int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
4024     ~ModifyAfterWithMapBufferRangeCase(void);
4025 
4026     void init(void);
4027     void deinit(void);
4028 
4029 private:
4030     static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
4031     void testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result, int bufferSize);
4032 
4033     const bool m_partialUpload;
4034     const uint32_t m_mapFlags;
4035 };
4036 
ModifyAfterWithMapBufferRangeCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags,uint32_t glMapFlags)4037 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase(Context &context, const char *name,
4038                                                                      const char *desc, int bufferSizeMin,
4039                                                                      int bufferSizeMax, uint32_t usage, int flags,
4040                                                                      uint32_t glMapFlags)
4041     : ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
4042                                                           isBufferUnspecifiedAfterUpload(flags, glMapFlags))
4043     , m_partialUpload((flags & FLAG_PARTIAL) != 0)
4044     , m_mapFlags(glMapFlags)
4045 {
4046 }
4047 
~ModifyAfterWithMapBufferRangeCase(void)4048 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase(void)
4049 {
4050     deinit();
4051 }
4052 
init(void)4053 void ModifyAfterWithMapBufferRangeCase::init(void)
4054 {
4055     // Log the purpose of the test
4056 
4057     m_testCtx.getLog() << tcu::TestLog::Message
4058                        << "Testing performance of MapBufferRange() command after a draw command that sources data from "
4059                           "the target buffer.\n"
4060                        << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
4061                        << "Map bits:\n"
4062                        << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
4063                        << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
4064                        << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
4065                        << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
4066                        << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
4067                        << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
4068                        << tcu::TestLog::EndMessage;
4069 
4070     ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
4071 }
4072 
deinit(void)4073 void ModifyAfterWithMapBufferRangeCase::deinit(void)
4074 {
4075     ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
4076 }
4077 
isBufferUnspecifiedAfterUpload(int flags,uint32_t mapFlags)4078 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
4079 {
4080     if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
4081         return true;
4082 
4083     return false;
4084 }
4085 
testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> & result,int bufferSize)4086 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result,
4087                                                            int bufferSize)
4088 {
4089     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4090     const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
4091     const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
4092     void *mapPtr;
4093 
4094     // map
4095     {
4096         uint64_t startTime;
4097         uint64_t endTime;
4098 
4099         startTime = deGetMicroseconds();
4100         mapPtr    = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
4101         endTime   = deGetMicroseconds();
4102 
4103         if (!mapPtr)
4104             throw tcu::TestError("mapBufferRange returned null");
4105 
4106         result.duration.mapDuration = endTime - startTime;
4107     }
4108 
4109     // write
4110     {
4111         result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
4112     }
4113 
4114     // unmap
4115     {
4116         uint64_t startTime;
4117         uint64_t endTime;
4118         glw::GLboolean unmapSucceeded;
4119 
4120         startTime      = deGetMicroseconds();
4121         unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
4122         endTime        = deGetMicroseconds();
4123 
4124         if (unmapSucceeded != GL_TRUE)
4125             throw UnmapFailureError();
4126 
4127         result.duration.unmapDuration = endTime - startTime;
4128     }
4129 
4130     result.duration.totalDuration =
4131         result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
4132     result.duration.fitResponseDuration = result.duration.totalDuration;
4133     result.writtenSize                  = subdataSize;
4134 }
4135 
4136 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
4137 {
4138 public:
4139     enum CaseFlags
4140     {
4141         FLAG_PARTIAL = 0x1,
4142     };
4143 
4144     ModifyAfterWithMapBufferFlushCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
4145                                       int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
4146     ~ModifyAfterWithMapBufferFlushCase(void);
4147 
4148     void init(void);
4149     void deinit(void);
4150 
4151 private:
4152     static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
4153     void testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize);
4154 
4155     const bool m_partialUpload;
4156     const uint32_t m_mapFlags;
4157 };
4158 
ModifyAfterWithMapBufferFlushCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,uint32_t usage,int flags,uint32_t glMapFlags)4159 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase(Context &context, const char *name,
4160                                                                      const char *desc, int bufferSizeMin,
4161                                                                      int bufferSizeMax, uint32_t usage, int flags,
4162                                                                      uint32_t glMapFlags)
4163     : ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
4164                                                                isBufferUnspecifiedAfterUpload(flags, glMapFlags))
4165     , m_partialUpload((flags & FLAG_PARTIAL) != 0)
4166     , m_mapFlags(glMapFlags)
4167 {
4168 }
4169 
~ModifyAfterWithMapBufferFlushCase(void)4170 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase(void)
4171 {
4172     deinit();
4173 }
4174 
init(void)4175 void ModifyAfterWithMapBufferFlushCase::init(void)
4176 {
4177     // Log the purpose of the test
4178 
4179     m_testCtx.getLog() << tcu::TestLog::Message
4180                        << "Testing performance of MapBufferRange() command after a draw command that sources data from "
4181                           "the target buffer.\n"
4182                        << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
4183                        << "Map bits:\n"
4184                        << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
4185                        << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
4186                        << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
4187                        << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
4188                        << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
4189                        << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
4190                        << tcu::TestLog::EndMessage;
4191 
4192     ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
4193 }
4194 
deinit(void)4195 void ModifyAfterWithMapBufferFlushCase::deinit(void)
4196 {
4197     ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
4198 }
4199 
isBufferUnspecifiedAfterUpload(int flags,uint32_t mapFlags)4200 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
4201 {
4202     if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
4203         return true;
4204 
4205     return false;
4206 }
4207 
testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> & result,int bufferSize)4208 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize(
4209     UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize)
4210 {
4211     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4212     const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
4213     const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
4214     void *mapPtr;
4215 
4216     // map
4217     {
4218         uint64_t startTime;
4219         uint64_t endTime;
4220 
4221         startTime = deGetMicroseconds();
4222         mapPtr    = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
4223         endTime   = deGetMicroseconds();
4224 
4225         if (!mapPtr)
4226             throw tcu::TestError("mapBufferRange returned null");
4227 
4228         result.duration.mapDuration = endTime - startTime;
4229     }
4230 
4231     // write
4232     {
4233         result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
4234     }
4235 
4236     // flush
4237     {
4238         uint64_t startTime;
4239         uint64_t endTime;
4240 
4241         startTime = deGetMicroseconds();
4242         gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
4243         endTime = deGetMicroseconds();
4244 
4245         result.duration.flushDuration = endTime - startTime;
4246     }
4247 
4248     // unmap
4249     {
4250         uint64_t startTime;
4251         uint64_t endTime;
4252         glw::GLboolean unmapSucceeded;
4253 
4254         startTime      = deGetMicroseconds();
4255         unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
4256         endTime        = deGetMicroseconds();
4257 
4258         if (unmapSucceeded != GL_TRUE)
4259             throw UnmapFailureError();
4260 
4261         result.duration.unmapDuration = endTime - startTime;
4262     }
4263 
4264     result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
4265                                     result.duration.unmapDuration + result.duration.flushDuration;
4266     result.duration.fitResponseDuration = result.duration.totalDuration;
4267     result.writtenSize                  = subdataSize;
4268 }
4269 
4270 enum DrawMethod
4271 {
4272     DRAWMETHOD_DRAW_ARRAYS = 0,
4273     DRAWMETHOD_DRAW_ELEMENTS,
4274 
4275     DRAWMETHOD_LAST
4276 };
4277 
4278 enum TargetBuffer
4279 {
4280     TARGETBUFFER_VERTEX = 0,
4281     TARGETBUFFER_INDEX,
4282 
4283     TARGETBUFFER_LAST
4284 };
4285 
4286 enum BufferState
4287 {
4288     BUFFERSTATE_NEW = 0,
4289     BUFFERSTATE_EXISTING,
4290 
4291     BUFFERSTATE_LAST
4292 };
4293 
4294 enum UploadMethod
4295 {
4296     UPLOADMETHOD_BUFFER_DATA = 0,
4297     UPLOADMETHOD_BUFFER_SUB_DATA,
4298     UPLOADMETHOD_MAP_BUFFER_RANGE,
4299 
4300     UPLOADMETHOD_LAST
4301 };
4302 
4303 enum UnrelatedBufferType
4304 {
4305     UNRELATEDBUFFERTYPE_NONE = 0,
4306     UNRELATEDBUFFERTYPE_VERTEX,
4307 
4308     UNRELATEDBUFFERTYPE_LAST
4309 };
4310 
4311 enum UploadRange
4312 {
4313     UPLOADRANGE_FULL = 0,
4314     UPLOADRANGE_PARTIAL,
4315 
4316     UPLOADRANGE_LAST
4317 };
4318 
4319 struct LayeredGridSpec
4320 {
4321     int gridWidth;
4322     int gridHeight;
4323     int gridLayers;
4324 };
4325 
getLayeredGridNumVertices(const LayeredGridSpec & scene)4326 static int getLayeredGridNumVertices(const LayeredGridSpec &scene)
4327 {
4328     return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
4329 }
4330 
generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> & vertexData,const LayeredGridSpec & scene)4331 static void generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> &vertexData, const LayeredGridSpec &scene)
4332 {
4333     // interleave color & vertex data
4334     const tcu::Vec4 green(0.0f, 1.0f, 0.0f, 0.7f);
4335     const tcu::Vec4 yellow(1.0f, 1.0f, 0.0f, 0.8f);
4336 
4337     vertexData.resize(getLayeredGridNumVertices(scene) * 2);
4338 
4339     for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
4340         for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
4341             for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
4342             {
4343                 const tcu::Vec4 color  = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
4344                 const float cellLeft   = (float(cellX) / (float)scene.gridWidth - 0.5f) * 2.0f;
4345                 const float cellRight  = (float(cellX + 1) / (float)scene.gridWidth - 0.5f) * 2.0f;
4346                 const float cellTop    = (float(cellY + 1) / (float)scene.gridHeight - 0.5f) * 2.0f;
4347                 const float cellBottom = (float(cellY) / (float)scene.gridHeight - 0.5f) * 2.0f;
4348 
4349                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 0] =
4350                     color;
4351                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 1] =
4352                     tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
4353 
4354                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 2] =
4355                     color;
4356                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 3] =
4357                     tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
4358 
4359                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 4] =
4360                     color;
4361                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 5] =
4362                     tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
4363 
4364                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 6] =
4365                     color;
4366                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 7] =
4367                     tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
4368 
4369                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 8] =
4370                     color;
4371                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 9] =
4372                     tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
4373 
4374                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] =
4375                     color;
4376                 vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] =
4377                     tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
4378             }
4379 }
4380 
generateLayeredGridIndexData(std::vector<uint32_t> & indexData,const LayeredGridSpec & scene)4381 static void generateLayeredGridIndexData(std::vector<uint32_t> &indexData, const LayeredGridSpec &scene)
4382 {
4383     indexData.resize(getLayeredGridNumVertices(scene) * 2);
4384 
4385     for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
4386         indexData[ndx] = ndx;
4387 }
4388 
4389 class RenderPerformanceTestBase : public TestCase
4390 {
4391 public:
4392     RenderPerformanceTestBase(Context &context, const char *name, const char *description);
4393     ~RenderPerformanceTestBase(void);
4394 
4395 protected:
4396     void init(void);
4397     void deinit(void);
4398 
4399     void waitGLResults(void) const;
4400     void setupVertexAttribs(void) const;
4401 
4402     enum
4403     {
4404         RENDER_AREA_SIZE = 128
4405     };
4406 
4407 private:
4408     glu::ShaderProgram *m_renderProgram;
4409     int m_colorLoc;
4410     int m_positionLoc;
4411 };
4412 
RenderPerformanceTestBase(Context & context,const char * name,const char * description)4413 RenderPerformanceTestBase::RenderPerformanceTestBase(Context &context, const char *name, const char *description)
4414     : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
4415     , m_renderProgram(DE_NULL)
4416     , m_colorLoc(0)
4417     , m_positionLoc(0)
4418 {
4419 }
4420 
~RenderPerformanceTestBase(void)4421 RenderPerformanceTestBase::~RenderPerformanceTestBase(void)
4422 {
4423     deinit();
4424 }
4425 
init(void)4426 void RenderPerformanceTestBase::init(void)
4427 {
4428     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4429 
4430     m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(),
4431                                              glu::ProgramSources() << glu::VertexSource(s_colorVertexShader)
4432                                                                    << glu::FragmentSource(s_colorFragmentShader));
4433     if (!m_renderProgram->isOk())
4434     {
4435         m_testCtx.getLog() << *m_renderProgram;
4436         throw tcu::TestError("could not build program");
4437     }
4438 
4439     m_colorLoc    = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
4440     m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
4441 
4442     if (m_colorLoc == -1)
4443         throw tcu::TestError("Location of attribute a_color was -1");
4444     if (m_positionLoc == -1)
4445         throw tcu::TestError("Location of attribute a_position was -1");
4446 }
4447 
deinit(void)4448 void RenderPerformanceTestBase::deinit(void)
4449 {
4450     delete m_renderProgram;
4451     m_renderProgram = DE_NULL;
4452 }
4453 
setupVertexAttribs(void) const4454 void RenderPerformanceTestBase::setupVertexAttribs(void) const
4455 {
4456     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4457 
4458     // buffers are bound
4459 
4460     gl.enableVertexAttribArray(m_colorLoc);
4461     gl.enableVertexAttribArray(m_positionLoc);
4462 
4463     gl.vertexAttribPointer(m_colorLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
4464                            glu::BufferOffsetAsPointer(0 * sizeof(tcu::Vec4)));
4465     gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
4466                            glu::BufferOffsetAsPointer(1 * sizeof(tcu::Vec4)));
4467 
4468     gl.useProgram(m_renderProgram->getProgram());
4469 
4470     GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4471 }
4472 
waitGLResults(void) const4473 void RenderPerformanceTestBase::waitGLResults(void) const
4474 {
4475     tcu::Surface unusedSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4476     glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
4477 }
4478 
4479 template <typename SampleType>
4480 class RenderCase : public RenderPerformanceTestBase
4481 {
4482 public:
4483     RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);
4484     ~RenderCase(void);
4485 
4486 protected:
4487     void init(void);
4488     void deinit(void);
4489 
4490 private:
4491     IterateResult iterate(void);
4492 
4493 protected:
4494     struct SampleResult
4495     {
4496         LayeredGridSpec scene;
4497         RenderSampleResult<SampleType> result;
4498     };
4499 
4500     int getMinWorkloadSize(void) const;
4501     int getMaxWorkloadSize(void) const;
4502     int getMinWorkloadDataSize(void) const;
4503     int getMaxWorkloadDataSize(void) const;
4504     int getVertexDataSize(void) const;
4505     int getNumSamples(void) const;
4506     void uploadScene(const LayeredGridSpec &scene);
4507 
4508     virtual void runSample(SampleResult &sample) = 0;
4509     virtual void logAndSetTestResult(const std::vector<SampleResult> &results);
4510 
4511     void mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
4512                                       const std::vector<SampleResult> &src) const;
4513 
4514     const DrawMethod m_drawMethod;
4515 
4516 private:
4517     glw::GLuint m_attributeBufferID;
4518     glw::GLuint m_indexBufferID;
4519     int m_iterationNdx;
4520     std::vector<int> m_iterationOrder;
4521     std::vector<SampleResult> m_results;
4522     int m_numUnmapFailures;
4523 };
4524 
4525 template <typename SampleType>
RenderCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4526 RenderCase<SampleType>::RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod)
4527     : RenderPerformanceTestBase(context, name, description)
4528     , m_drawMethod(drawMethod)
4529     , m_attributeBufferID(0)
4530     , m_indexBufferID(0)
4531     , m_iterationNdx(0)
4532     , m_numUnmapFailures(0)
4533 {
4534     DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4535 }
4536 
4537 template <typename SampleType>
~RenderCase(void)4538 RenderCase<SampleType>::~RenderCase(void)
4539 {
4540     deinit();
4541 }
4542 
4543 template <typename SampleType>
init(void)4544 void RenderCase<SampleType>::init(void)
4545 {
4546     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4547 
4548     RenderPerformanceTestBase::init();
4549 
4550     // requirements
4551 
4552     if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4553         m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4554         throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
4555                                      de::toString<int>(RENDER_AREA_SIZE) + " render target");
4556 
4557     // gl state
4558 
4559     gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4560 
4561     // enable bleding to prevent grid layers from being discarded
4562     gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4563     gl.blendEquation(GL_FUNC_ADD);
4564     gl.enable(GL_BLEND);
4565 
4566     // generate iterations
4567 
4568     {
4569         const int gridSizes[] = {20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80, 86, 92, 98, 104, 110, 116, 122, 128};
4570 
4571         for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4572         {
4573             m_results.push_back(SampleResult());
4574 
4575             m_results.back().scene.gridHeight = gridSizes[gridNdx];
4576             m_results.back().scene.gridWidth  = gridSizes[gridNdx];
4577             m_results.back().scene.gridLayers = 5;
4578 
4579             m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4580 
4581             // test cases set these, initialize to unused values
4582             m_results.back().result.renderDataSize    = -1;
4583             m_results.back().result.uploadedDataSize  = -1;
4584             m_results.back().result.unrelatedDataSize = -1;
4585         }
4586     }
4587 
4588     // randomize iteration order
4589     {
4590         m_iterationOrder.resize(m_results.size());
4591         generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4592     }
4593 }
4594 
4595 template <typename SampleType>
deinit(void)4596 void RenderCase<SampleType>::deinit(void)
4597 {
4598     RenderPerformanceTestBase::deinit();
4599 
4600     if (m_attributeBufferID)
4601     {
4602         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4603         m_attributeBufferID = 0;
4604     }
4605 
4606     if (m_indexBufferID)
4607     {
4608         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4609         m_indexBufferID = 0;
4610     }
4611 }
4612 
4613 template <typename SampleType>
iterate(void)4614 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate(void)
4615 {
4616     const int unmapFailureThreshold = 3;
4617     const int currentIteration      = m_iterationNdx;
4618     const int currentConfigNdx      = m_iterationOrder[currentIteration];
4619     SampleResult &currentSample     = m_results[currentConfigNdx];
4620 
4621     try
4622     {
4623         runSample(currentSample);
4624         ++m_iterationNdx;
4625     }
4626     catch (const UnmapFailureError &ex)
4627     {
4628         DE_UNREF(ex);
4629         ++m_numUnmapFailures;
4630     }
4631 
4632     if (m_numUnmapFailures > unmapFailureThreshold)
4633         throw tcu::TestError("Got too many unmap errors");
4634 
4635     if (m_iterationNdx < (int)m_iterationOrder.size())
4636         return CONTINUE;
4637 
4638     logAndSetTestResult(m_results);
4639     return STOP;
4640 }
4641 
4642 template <typename SampleType>
getMinWorkloadSize(void) const4643 int RenderCase<SampleType>::getMinWorkloadSize(void) const
4644 {
4645     int result = getLayeredGridNumVertices(m_results[0].scene);
4646 
4647     for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4648     {
4649         const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4650         result                 = de::min(result, workloadSize);
4651     }
4652 
4653     return result;
4654 }
4655 
4656 template <typename SampleType>
getMaxWorkloadSize(void) const4657 int RenderCase<SampleType>::getMaxWorkloadSize(void) const
4658 {
4659     int result = getLayeredGridNumVertices(m_results[0].scene);
4660 
4661     for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4662     {
4663         const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4664         result                 = de::max(result, workloadSize);
4665     }
4666 
4667     return result;
4668 }
4669 
4670 template <typename SampleType>
getMinWorkloadDataSize(void) const4671 int RenderCase<SampleType>::getMinWorkloadDataSize(void) const
4672 {
4673     return getMinWorkloadSize() * getVertexDataSize();
4674 }
4675 
4676 template <typename SampleType>
getMaxWorkloadDataSize(void) const4677 int RenderCase<SampleType>::getMaxWorkloadDataSize(void) const
4678 {
4679     return getMaxWorkloadSize() * getVertexDataSize();
4680 }
4681 
4682 template <typename SampleType>
getVertexDataSize(void) const4683 int RenderCase<SampleType>::getVertexDataSize(void) const
4684 {
4685     const int numVectors = 2;
4686     const int vec4Size   = 4 * sizeof(float);
4687 
4688     return numVectors * vec4Size;
4689 }
4690 
4691 template <typename SampleType>
getNumSamples(void) const4692 int RenderCase<SampleType>::getNumSamples(void) const
4693 {
4694     return (int)m_results.size();
4695 }
4696 
4697 template <typename SampleType>
uploadScene(const LayeredGridSpec & scene)4698 void RenderCase<SampleType>::uploadScene(const LayeredGridSpec &scene)
4699 {
4700     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4701 
4702     // vertex buffer
4703     {
4704         std::vector<tcu::Vec4> vertexData;
4705 
4706         generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4707 
4708         if (m_attributeBufferID == 0)
4709             gl.genBuffers(1, &m_attributeBufferID);
4710         gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4711         gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4712     }
4713 
4714     // index buffer
4715     if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4716     {
4717         std::vector<uint32_t> indexData;
4718 
4719         generateLayeredGridIndexData(indexData, scene);
4720 
4721         if (m_indexBufferID == 0)
4722             gl.genBuffers(1, &m_indexBufferID);
4723         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4724         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4725                       GL_STATIC_DRAW);
4726     }
4727 
4728     GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4729 }
4730 
4731 template <typename SampleType>
logAndSetTestResult(const std::vector<SampleResult> & results)4732 void RenderCase<SampleType>::logAndSetTestResult(const std::vector<SampleResult> &results)
4733 {
4734     std::vector<RenderSampleResult<SampleType>> mappedResults;
4735 
4736     mapResultsToRenderRateFormat(mappedResults, results);
4737 
4738     {
4739         const RenderSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4740         const float rate                         = analysis.renderRateAtRange;
4741 
4742         if (rate == std::numeric_limits<float>::infinity())
4743         {
4744             // sample times are 1) invalid or 2) timer resolution too low
4745             m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4746         }
4747         else
4748         {
4749             // report transfer rate in millions of MiB/s
4750             m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4751         }
4752     }
4753 }
4754 
4755 template <typename SampleType>
mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> & dst,const std::vector<SampleResult> & src) const4756 void RenderCase<SampleType>::mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
4757                                                           const std::vector<SampleResult> &src) const
4758 {
4759     dst.resize(src.size());
4760 
4761     for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4762         dst[ndx] = src[ndx].result;
4763 }
4764 
4765 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4766 {
4767 public:
4768     ReferenceRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);
4769 
4770 private:
4771     void init(void);
4772     void runSample(SampleResult &sample);
4773 };
4774 
ReferenceRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4775 ReferenceRenderTimeCase::ReferenceRenderTimeCase(Context &context, const char *name, const char *description,
4776                                                  DrawMethod drawMethod)
4777     : RenderCase<RenderReadDuration>(context, name, description, drawMethod)
4778 {
4779 }
4780 
init(void)4781 void ReferenceRenderTimeCase::init(void)
4782 {
4783     const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4784 
4785     // init parent
4786     RenderCase<RenderReadDuration>::init();
4787 
4788     // log
4789     m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
4790                        << " and readPixels call with different rendering workloads.\n"
4791                        << getNumSamples() << " test samples. Sample order is randomized.\n"
4792                        << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4793                        << "Generated workload is multiple viewport-covering grids with varying number of cells, each "
4794                           "cell is two separate triangles.\n"
4795                        << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
4796                        << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4797                        << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4798                        << "Test result is the approximated total processing rate in MiB / s.\n"
4799                        << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4800                                ("Note that index array size is not included in the processed size.\n") :
4801                                (""))
4802                        << "Note! Test result should only be used as a baseline reference result for "
4803                           "buffer.data_upload.* test group results."
4804                        << tcu::TestLog::EndMessage;
4805 }
4806 
runSample(SampleResult & sample)4807 void ReferenceRenderTimeCase::runSample(SampleResult &sample)
4808 {
4809     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4810     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4811     const int numVertices = getLayeredGridNumVertices(sample.scene);
4812     const glu::Buffer arrayBuffer(m_context.getRenderContext());
4813     const glu::Buffer indexBuffer(m_context.getRenderContext());
4814     std::vector<tcu::Vec4> vertexData;
4815     std::vector<uint32_t> indexData;
4816     uint64_t startTime;
4817     uint64_t endTime;
4818 
4819     // generate and upload buffers
4820 
4821     generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4822     gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4823     gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4824 
4825     if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4826     {
4827         generateLayeredGridIndexData(indexData, sample.scene);
4828         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4829         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4830                       GL_STATIC_DRAW);
4831     }
4832 
4833     setupVertexAttribs();
4834 
4835     // make sure data is uploaded
4836 
4837     if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4838         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4839     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4840         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4841     else
4842         DE_ASSERT(false);
4843     waitGLResults();
4844 
4845     gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4846     gl.clear(GL_COLOR_BUFFER_BIT);
4847     waitGLResults();
4848 
4849     tcu::warmupCPU();
4850 
4851     // Measure both draw and associated readpixels
4852     {
4853         startTime = deGetMicroseconds();
4854 
4855         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4856             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4857         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4858             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4859         else
4860             DE_ASSERT(false);
4861 
4862         endTime = deGetMicroseconds();
4863 
4864         sample.result.duration.renderDuration = endTime - startTime;
4865     }
4866 
4867     {
4868         startTime = deGetMicroseconds();
4869         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4870         endTime = deGetMicroseconds();
4871 
4872         sample.result.duration.readDuration = endTime - startTime;
4873     }
4874 
4875     sample.result.renderDataSize    = getVertexDataSize() * sample.result.numVertices;
4876     sample.result.uploadedDataSize  = 0;
4877     sample.result.unrelatedDataSize = 0;
4878     sample.result.duration.renderReadDuration =
4879         sample.result.duration.renderDuration + sample.result.duration.readDuration;
4880     sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4881     sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4882 }
4883 
4884 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4885 {
4886 public:
4887     UnrelatedUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
4888                                   UploadMethod unrelatedUploadMethod);
4889 
4890 private:
4891     void init(void);
4892     void runSample(SampleResult &sample);
4893 
4894     const UploadMethod m_unrelatedUploadMethod;
4895 };
4896 
UnrelatedUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,UploadMethod unrelatedUploadMethod)4897 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase(Context &context, const char *name,
4898                                                              const char *description, DrawMethod drawMethod,
4899                                                              UploadMethod unrelatedUploadMethod)
4900     : RenderCase<UnrelatedUploadRenderReadDuration>(context, name, description, drawMethod)
4901     , m_unrelatedUploadMethod(unrelatedUploadMethod)
4902 {
4903     DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4904 }
4905 
init(void)4906 void UnrelatedUploadRenderTimeCase::init(void)
4907 {
4908     const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4909     tcu::MessageBuilder message(&m_testCtx.getLog());
4910 
4911     // init parent
4912     RenderCase<UnrelatedUploadRenderReadDuration>::init();
4913 
4914     // log
4915 
4916     message << "Measuring the time used in " << targetFunctionName
4917             << " and readPixels call with different rendering workloads.\n"
4918             << "Uploading an unrelated buffer just before issuing the rendering command with "
4919             << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)      ? ("bufferData") :
4920                 (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)  ? ("bufferSubData") :
4921                 (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange") :
4922                                                                              ((const char *)DE_NULL))
4923             << ".\n"
4924             << getNumSamples() << " test samples. Sample order is randomized.\n"
4925             << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4926             << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
4927                "separate triangles.\n"
4928             << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
4929             << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4930             << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4931             << "Unrelated upload sizes are in the range [" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4932             << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4933             << "Test result is the approximated total processing rate in MiB / s.\n"
4934             << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4935                     ("Note that index array size is not included in the processed size.\n") :
4936                     (""))
4937             << "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4938             << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
4939                "and upload_and_draw.*_and_unrelated_upload group results.\n"
4940             << tcu::TestLog::EndMessage;
4941 }
4942 
runSample(SampleResult & sample)4943 void UnrelatedUploadRenderTimeCase::runSample(SampleResult &sample)
4944 {
4945     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
4946     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4947     const int numVertices = getLayeredGridNumVertices(sample.scene);
4948     const glu::Buffer arrayBuffer(m_context.getRenderContext());
4949     const glu::Buffer indexBuffer(m_context.getRenderContext());
4950     const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
4951     int unrelatedUploadSize = -1;
4952     int renderUploadSize;
4953     std::vector<tcu::Vec4> vertexData;
4954     std::vector<uint32_t> indexData;
4955     uint64_t startTime;
4956     uint64_t endTime;
4957 
4958     // generate and upload buffers
4959 
4960     generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4961     renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4962 
4963     gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4964     gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4965 
4966     if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4967     {
4968         generateLayeredGridIndexData(indexData, sample.scene);
4969         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4970         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
4971                       GL_STATIC_DRAW);
4972     }
4973 
4974     setupVertexAttribs();
4975 
4976     // make sure data is uploaded
4977 
4978     if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4979         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4980     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4981         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4982     else
4983         DE_ASSERT(false);
4984     waitGLResults();
4985 
4986     gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4987     gl.clear(GL_COLOR_BUFFER_BIT);
4988     waitGLResults();
4989 
4990     tcu::warmupCPU();
4991 
4992     // Unrelated upload
4993     if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4994     {
4995         unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4996 
4997         gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4998         gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4999     }
5000     else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5001     {
5002         unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5003 
5004         gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5005         gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
5006         gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
5007     }
5008     else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5009     {
5010         void *mapPtr;
5011         glw::GLboolean unmapSuccessful;
5012 
5013         unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5014 
5015         gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5016         gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
5017 
5018         mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize,
5019                                    GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
5020                                        GL_MAP_UNSYNCHRONIZED_BIT);
5021         if (!mapPtr)
5022             throw tcu::Exception("MapBufferRange returned NULL");
5023 
5024         deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
5025 
5026         // if unmapping fails, just try again later
5027         unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
5028         if (!unmapSuccessful)
5029             throw UnmapFailureError();
5030     }
5031     else
5032         DE_ASSERT(false);
5033 
5034     DE_ASSERT(unrelatedUploadSize != -1);
5035 
5036     // Measure both draw and associated readpixels
5037     {
5038         startTime = deGetMicroseconds();
5039 
5040         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5041             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5042         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5043             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5044         else
5045             DE_ASSERT(false);
5046 
5047         endTime = deGetMicroseconds();
5048 
5049         sample.result.duration.renderDuration = endTime - startTime;
5050     }
5051 
5052     {
5053         startTime = deGetMicroseconds();
5054         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5055         endTime = deGetMicroseconds();
5056 
5057         sample.result.duration.readDuration = endTime - startTime;
5058     }
5059 
5060     sample.result.renderDataSize    = getVertexDataSize() * sample.result.numVertices;
5061     sample.result.uploadedDataSize  = renderUploadSize;
5062     sample.result.unrelatedDataSize = unrelatedUploadSize;
5063     sample.result.duration.renderReadDuration =
5064         sample.result.duration.renderDuration + sample.result.duration.readDuration;
5065     sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5066     sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5067 }
5068 
5069 class ReferenceReadPixelsTimeCase : public TestCase
5070 {
5071 public:
5072     ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description);
5073 
5074 private:
5075     void init(void);
5076     IterateResult iterate(void);
5077     void logAndSetTestResult(void);
5078 
5079     enum
5080     {
5081         RENDER_AREA_SIZE = 128
5082     };
5083 
5084     const int m_numSamples;
5085     int m_sampleNdx;
5086     std::vector<int> m_samples;
5087 };
5088 
ReferenceReadPixelsTimeCase(Context & context,const char * name,const char * description)5089 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description)
5090     : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
5091     , m_numSamples(20)
5092     , m_sampleNdx(0)
5093     , m_samples(m_numSamples)
5094 {
5095 }
5096 
init(void)5097 void ReferenceReadPixelsTimeCase::init(void)
5098 {
5099     m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in a single readPixels call with "
5100                        << m_numSamples << " test samples.\n"
5101                        << "Test result is the median of the samples in microseconds.\n"
5102                        << "Note! Test result should only be used as a baseline reference result for "
5103                           "buffer.data_upload.* test group results."
5104                        << tcu::TestLog::EndMessage;
5105 }
5106 
iterate(void)5107 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate(void)
5108 {
5109     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5110     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5111     uint64_t startTime;
5112     uint64_t endTime;
5113 
5114     deYield();
5115     tcu::warmupCPU();
5116     deYield();
5117 
5118     // "Render" something and wait for it
5119     gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f);
5120     gl.clear(GL_COLOR_BUFFER_BIT);
5121 
5122     // wait for results
5123     glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5124 
5125     // measure time used in readPixels
5126     startTime = deGetMicroseconds();
5127     glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5128     endTime = deGetMicroseconds();
5129 
5130     m_samples[m_sampleNdx] = (int)(endTime - startTime);
5131 
5132     if (++m_sampleNdx < m_numSamples)
5133         return CONTINUE;
5134 
5135     logAndSetTestResult();
5136     return STOP;
5137 }
5138 
logAndSetTestResult(void)5139 void ReferenceReadPixelsTimeCase::logAndSetTestResult(void)
5140 {
5141     // Log sample list
5142     {
5143         m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
5144                            << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
5145                            << tcu::TestLog::EndSampleInfo;
5146 
5147         for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5148             m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx] << tcu::TestLog::EndSample;
5149 
5150         m_testCtx.getLog() << tcu::TestLog::EndSampleList;
5151     }
5152 
5153     // Log median
5154     {
5155         float median;
5156         float limit60Low;
5157         float limit60Up;
5158 
5159         std::sort(m_samples.begin(), m_samples.end());
5160         median     = linearSample(m_samples, 0.5f);
5161         limit60Low = linearSample(m_samples, 0.2f);
5162         limit60Up  = linearSample(m_samples, 0.8f);
5163 
5164         m_testCtx.getLog() << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
5165                            << tcu::TestLog::Message << "60 % of samples within range:\n"
5166                            << tcu::TestLog::EndMessage
5167                            << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
5168                            << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
5169 
5170         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
5171     }
5172 }
5173 
5174 template <typename SampleType>
5175 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
5176 {
5177 public:
5178     typedef typename RenderCase<SampleType>::SampleResult SampleResult;
5179 
5180     GenericUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
5181                                 TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState,
5182                                 UploadRange uploadRange, UnrelatedBufferType unrelatedBufferType);
5183 
5184 private:
5185     void init(void);
5186     void runSample(SampleResult &sample);
5187 
5188     using RenderCase<SampleType>::RENDER_AREA_SIZE;
5189 
5190     const TargetBuffer m_targetBuffer;
5191     const BufferState m_bufferState;
5192     const UploadMethod m_uploadMethod;
5193     const UnrelatedBufferType m_unrelatedBufferType;
5194     const UploadRange m_uploadRange;
5195 
5196     using RenderCase<SampleType>::m_context;
5197     using RenderCase<SampleType>::m_testCtx;
5198     using RenderCase<SampleType>::m_drawMethod;
5199 };
5200 
5201 template <typename SampleType>
GenericUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState,UploadRange uploadRange,UnrelatedBufferType unrelatedBufferType)5202 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase(Context &context, const char *name,
5203                                                                      const char *description, DrawMethod method,
5204                                                                      TargetBuffer targetBuffer,
5205                                                                      UploadMethod uploadMethod, BufferState bufferState,
5206                                                                      UploadRange uploadRange,
5207                                                                      UnrelatedBufferType unrelatedBufferType)
5208     : RenderCase<SampleType>(context, name, description, method)
5209     , m_targetBuffer(targetBuffer)
5210     , m_bufferState(bufferState)
5211     , m_uploadMethod(uploadMethod)
5212     , m_unrelatedBufferType(unrelatedBufferType)
5213     , m_uploadRange(uploadRange)
5214 {
5215     DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5216     DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
5217     DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5218     DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
5219     DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5220 }
5221 
5222 template <typename SampleType>
init(void)5223 void GenericUploadRenderTimeCase<SampleType>::init(void)
5224 {
5225     // init parent
5226     RenderCase<SampleType>::init();
5227 
5228     // log
5229     {
5230         const char *const targetFunctionName =
5231             (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5232         const int perVertexSize =
5233             (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(uint32_t)) : ((int)sizeof(tcu::Vec4[2]));
5234         const int fullMinUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
5235         const int fullMaxUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
5236         const int minUploadSize =
5237             (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize / 2, 4));
5238         const int maxUploadSize =
5239             (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize / 2, 4));
5240         const int minUnrelatedUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
5241         const int maxUnrelatedUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
5242 
5243         m_testCtx.getLog()
5244             << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
5245             << " and readPixels call with different rendering workloads.\n"
5246             << "The " << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib")) << " buffer "
5247             << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents ")) << "sourced by the rendering command "
5248             << ((m_bufferState == BUFFERSTATE_NEW)     ? ("is uploaded ") :
5249                 (m_uploadRange == UPLOADRANGE_FULL)    ? ("are specified ") :
5250                 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("are updated (partial upload) ") :
5251                                                          ((const char *)DE_NULL))
5252             << "just before issuing the rendering command.\n"
5253             << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") :
5254                                                           ("The buffer is generated just before uploading.\n"))
5255             << "Buffer "
5256             << ((m_bufferState == BUFFERSTATE_NEW)     ? ("is uploaded") :
5257                 (m_uploadRange == UPLOADRANGE_FULL)    ? ("contents are specified") :
5258                 (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("contents are partially updated") :
5259                                                          ((const char *)DE_NULL))
5260             << " with "
5261             << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)     ? ("bufferData") :
5262                 (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
5263                                                                    ("mapBufferRange"))
5264             << " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
5265             << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
5266                     ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | "
5267                      "MAP_UNSYNCHRONIZED_BIT\n") :
5268                     (""))
5269             << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
5270                     ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") :
5271                     (""))
5272             << RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
5273             << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5274             << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
5275                "separate triangles.\n"
5276             << "Workload sizes are in the range [" << RenderCase<SampleType>::getMinWorkloadSize() << ",  "
5277             << RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
5278             << "([" << getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
5279             << getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
5280             << "Upload sizes are in the range [" << getHumanReadableByteSize(minUploadSize) << ","
5281             << getHumanReadableByteSize(maxUploadSize) << "].\n"
5282             << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
5283                     ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) +
5284                      ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
5285                     (""))
5286             << "Test result is the approximated processing rate in MiB / s.\n"
5287             << "Note that while upload time is measured, the time used is not included in the results.\n"
5288             << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
5289                     ("Note that the data size and the time used in the unrelated upload is not included in the "
5290                      "results.\n") :
5291                     (""))
5292             << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
5293                     ("Note that index array size is not included in the processed size.\n") :
5294                     (""))
5295             << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
5296                "and other upload_and_draw.* group results.\n"
5297             << tcu::TestLog::EndMessage;
5298     }
5299 }
5300 
5301 template <typename SampleType>
runSample(SampleResult & sample)5302 void GenericUploadRenderTimeCase<SampleType>::runSample(SampleResult &sample)
5303 {
5304     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5305     const glu::Buffer arrayBuffer(m_context.getRenderContext());
5306     const glu::Buffer indexBuffer(m_context.getRenderContext());
5307     const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
5308     const int numVertices = getLayeredGridNumVertices(sample.scene);
5309     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5310     uint64_t startTime;
5311     uint64_t endTime;
5312     std::vector<tcu::Vec4> vertexData;
5313     std::vector<uint32_t> indexData;
5314 
5315     // create data
5316 
5317     generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5318     if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5319         generateLayeredGridIndexData(indexData, sample.scene);
5320 
5321     gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5322     gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5323     RenderCase<SampleType>::setupVertexAttribs();
5324 
5325     // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
5326 
5327     if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
5328     {
5329         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5330                       GL_DYNAMIC_DRAW);
5331         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5332     }
5333     else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
5334     {
5335         // do not touch the vertex buffer
5336     }
5337     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
5338     {
5339         // hint that the target buffer will be modified soon
5340         const glw::GLenum vertexDataUsage =
5341             (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
5342         const glw::GLenum indexDataUsage =
5343             (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
5344 
5345         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5346                       vertexDataUsage);
5347         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
5348                       indexDataUsage);
5349         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5350     }
5351     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
5352     {
5353         if (m_targetBuffer == TARGETBUFFER_VERTEX)
5354         {
5355             // make the index buffer present on the gpu
5356             // use another vertex buffer to keep original buffer in unused state
5357             const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5358 
5359             gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5360             RenderCase<SampleType>::setupVertexAttribs();
5361 
5362             gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5363                           GL_STATIC_DRAW);
5364             gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
5365                           &indexData[0], GL_STATIC_DRAW);
5366             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5367 
5368             // restore original state
5369             gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5370             RenderCase<SampleType>::setupVertexAttribs();
5371         }
5372         else if (m_targetBuffer == TARGETBUFFER_INDEX)
5373         {
5374             // make the vertex buffer present on the gpu
5375             gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5376                           GL_STATIC_DRAW);
5377             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5378         }
5379         else
5380             DE_ASSERT(false);
5381     }
5382     else
5383         DE_ASSERT(false);
5384 
5385     RenderCase<SampleType>::waitGLResults();
5386     GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5387 
5388     gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5389     gl.clear(GL_COLOR_BUFFER_BIT);
5390     RenderCase<SampleType>::waitGLResults();
5391 
5392     tcu::warmupCPU();
5393 
5394     // upload
5395 
5396     {
5397         glw::GLenum target;
5398         glw::GLsizeiptr size;
5399         glw::GLintptr offset = 0;
5400         const void *source;
5401 
5402         if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5403         {
5404             target = GL_ARRAY_BUFFER;
5405             size   = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5406             source = &vertexData[0];
5407         }
5408         else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5409         {
5410             target = GL_ELEMENT_ARRAY_BUFFER;
5411             size   = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
5412             source = &indexData[0];
5413         }
5414         else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5415         {
5416             DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
5417 
5418             target = GL_ARRAY_BUFFER;
5419             size   = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5420             offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5421             source = (const uint8_t *)&vertexData[0] + offset;
5422         }
5423         else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5424         {
5425             DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
5426 
5427             // upload to 25% - 75% range
5428             target = GL_ELEMENT_ARRAY_BUFFER;
5429             size   = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
5430             offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5431             source = (const uint8_t *)&indexData[0] + offset;
5432         }
5433         else
5434         {
5435             DE_ASSERT(false);
5436             return;
5437         }
5438 
5439         startTime = deGetMicroseconds();
5440 
5441         if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5442             gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
5443         else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5444         {
5445             // create buffer storage
5446             if (m_bufferState == BUFFERSTATE_NEW)
5447                 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
5448             gl.bufferSubData(target, offset, size, source);
5449         }
5450         else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5451         {
5452             void *mapPtr;
5453             glw::GLboolean unmapSuccessful;
5454 
5455             // create buffer storage
5456             if (m_bufferState == BUFFERSTATE_NEW)
5457                 gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
5458 
5459             mapPtr = gl.mapBufferRange(target, offset, size,
5460                                        GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
5461                                            GL_MAP_UNSYNCHRONIZED_BIT);
5462             if (!mapPtr)
5463                 throw tcu::Exception("MapBufferRange returned NULL");
5464 
5465             deMemcpy(mapPtr, source, (int)size);
5466 
5467             // if unmapping fails, just try again later
5468             unmapSuccessful = gl.unmapBuffer(target);
5469             if (!unmapSuccessful)
5470                 throw UnmapFailureError();
5471         }
5472         else
5473             DE_ASSERT(false);
5474 
5475         endTime = deGetMicroseconds();
5476 
5477         sample.result.uploadedDataSize        = (int)size;
5478         sample.result.duration.uploadDuration = endTime - startTime;
5479     }
5480 
5481     // unrelated
5482     if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
5483     {
5484         const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5485 
5486         gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5487         gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
5488         // Attibute pointers are not modified, no need restore state
5489 
5490         sample.result.unrelatedDataSize = unrelatedUploadSize;
5491     }
5492 
5493     // draw
5494     {
5495         startTime = deGetMicroseconds();
5496 
5497         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5498             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5499         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5500             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5501         else
5502             DE_ASSERT(false);
5503 
5504         endTime = deGetMicroseconds();
5505 
5506         sample.result.duration.renderDuration = endTime - startTime;
5507     }
5508 
5509     // read
5510     {
5511         startTime = deGetMicroseconds();
5512         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5513         endTime = deGetMicroseconds();
5514 
5515         sample.result.duration.readDuration = endTime - startTime;
5516     }
5517 
5518     // set results
5519 
5520     sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5521 
5522     sample.result.duration.renderReadDuration =
5523         sample.result.duration.renderDuration + sample.result.duration.readDuration;
5524     sample.result.duration.totalDuration = sample.result.duration.uploadDuration +
5525                                            sample.result.duration.renderDuration + sample.result.duration.readDuration;
5526     sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5527 }
5528 
5529 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5530 {
5531 public:
5532     enum MapFlags
5533     {
5534         MAPFLAG_NONE = 0,
5535         MAPFLAG_INVALIDATE_BUFFER,
5536         MAPFLAG_INVALIDATE_RANGE,
5537 
5538         MAPFLAG_LAST
5539     };
5540     enum UploadBufferTarget
5541     {
5542         UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5543         UPLOADBUFFERTARGET_SAME_BUFFER,
5544 
5545         UPLOADBUFFERTARGET_LAST
5546     };
5547     BufferInUseRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
5548                               MapFlags mapFlags, TargetBuffer targetBuffer, UploadMethod uploadMethod,
5549                               UploadRange uploadRange, UploadBufferTarget uploadTarget);
5550 
5551 private:
5552     void init(void);
5553     void runSample(SampleResult &sample);
5554 
5555     const TargetBuffer m_targetBuffer;
5556     const UploadMethod m_uploadMethod;
5557     const UploadRange m_uploadRange;
5558     const MapFlags m_mapFlags;
5559     const UploadBufferTarget m_uploadBufferTarget;
5560 };
5561 
BufferInUseRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,MapFlags mapFlags,TargetBuffer targetBuffer,UploadMethod uploadMethod,UploadRange uploadRange,UploadBufferTarget uploadTarget)5562 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase(Context &context, const char *name, const char *description,
5563                                                      DrawMethod method, MapFlags mapFlags, TargetBuffer targetBuffer,
5564                                                      UploadMethod uploadMethod, UploadRange uploadRange,
5565                                                      UploadBufferTarget uploadTarget)
5566     : RenderCase<RenderUploadRenderReadDuration>(context, name, description, method)
5567     , m_targetBuffer(targetBuffer)
5568     , m_uploadMethod(uploadMethod)
5569     , m_uploadRange(uploadRange)
5570     , m_mapFlags(mapFlags)
5571     , m_uploadBufferTarget(uploadTarget)
5572 {
5573     DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5574     DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5575     DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5576     DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5577     DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5578 }
5579 
init(void)5580 void BufferInUseRenderTimeCase::init(void)
5581 {
5582     RenderCase<RenderUploadRenderReadDuration>::init();
5583 
5584     // log
5585     {
5586         const char *const targetFunctionName =
5587             (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5588         const char *const uploadFunctionName = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)     ? ("bufferData") :
5589                                                (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
5590                                                                                                   ("mapBufferRange");
5591         const bool isReferenceCase           = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5592         tcu::MessageBuilder message(&m_testCtx.getLog());
5593 
5594         message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5595                 << targetFunctionName
5596                 << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5597 
5598         if (isReferenceCase)
5599             message << "Rendering:\n"
5600                     << "    before test: create and use buffers B and C\n"
5601                     << "    first draw: render using buffer B\n"
5602                     << ((m_uploadRange == UPLOADRANGE_FULL)    ? ("    upload: respecify buffer C contents\n") :
5603                         (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("    upload: modify buffer C contents\n") :
5604                                                                  ((const char *)DE_NULL))
5605                     << "    second draw: render using buffer C\n"
5606                     << "    read: readPixels\n";
5607         else
5608             message << "Rendering:\n"
5609                     << "    before test: create and use buffer B\n"
5610                     << "    first draw: render using buffer B\n"
5611                     << ((m_uploadRange == UPLOADRANGE_FULL)    ? ("    upload: respecify buffer B contents\n") :
5612                         (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("    upload: modify buffer B contents\n") :
5613                                                                  ((const char *)DE_NULL))
5614                     << "    second draw: render using buffer B\n"
5615                     << "    read: readPixels\n";
5616 
5617         message << "Uploading using " << uploadFunctionName
5618                 << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT") :
5619                     (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ?
5620                                                                (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT") :
5621                     (m_mapFlags == MAPFLAG_NONE) ? ("") :
5622                                                    ((const char *)DE_NULL))
5623                 << "\n"
5624                 << getNumSamples() << " test samples. Sample order is randomized.\n"
5625                 << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5626                 << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
5627                 << "] vertices "
5628                 << "([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5629                 << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5630                 << "Test result is the approximated processing rate in MiB / s of the second draw call and the "
5631                    "readPixels call.\n";
5632 
5633         if (isReferenceCase)
5634             message << "Note! Test result should only be used as a baseline reference result for "
5635                        "buffer.render_after_upload.draw_modify_draw test group results.";
5636         else
5637             message << "Note! Test result may not be useful as is but instead should be compared against the "
5638                        "buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5639 
5640         message << tcu::TestLog::EndMessage;
5641     }
5642 }
5643 
runSample(SampleResult & sample)5644 void BufferInUseRenderTimeCase::runSample(SampleResult &sample)
5645 {
5646     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
5647     const glu::Buffer arrayBuffer(m_context.getRenderContext());
5648     const glu::Buffer indexBuffer(m_context.getRenderContext());
5649     const glu::Buffer alternativeUploadBuffer(m_context.getRenderContext());
5650     const int numVertices = getLayeredGridNumVertices(sample.scene);
5651     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5652     uint64_t startTime;
5653     uint64_t endTime;
5654     std::vector<tcu::Vec4> vertexData;
5655     std::vector<uint32_t> indexData;
5656 
5657     // create data
5658 
5659     generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5660     if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5661         generateLayeredGridIndexData(indexData, sample.scene);
5662 
5663     // make buffers used
5664 
5665     gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5666     gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5667     setupVertexAttribs();
5668 
5669     if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5670     {
5671         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5672                       GL_STREAM_DRAW);
5673         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5674     }
5675     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5676     {
5677         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5678                       GL_STREAM_DRAW);
5679         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
5680                       GL_STREAM_DRAW);
5681         gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5682     }
5683     else
5684         DE_ASSERT(false);
5685 
5686     // another pair of buffers for reference case
5687     if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5688     {
5689         if (m_targetBuffer == TARGETBUFFER_VERTEX)
5690         {
5691             gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5692             gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
5693                           GL_STREAM_DRAW);
5694 
5695             setupVertexAttribs();
5696             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5697         }
5698         else if (m_targetBuffer == TARGETBUFFER_INDEX)
5699         {
5700             gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5701             gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
5702                           &indexData[0], GL_STREAM_DRAW);
5703             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5704         }
5705         else
5706             DE_ASSERT(false);
5707 
5708         // restore state
5709         gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5710         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5711         setupVertexAttribs();
5712     }
5713 
5714     waitGLResults();
5715     GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5716 
5717     gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5718     gl.clear(GL_COLOR_BUFFER_BIT);
5719     waitGLResults();
5720 
5721     tcu::warmupCPU();
5722 
5723     // first draw
5724     {
5725         startTime = deGetMicroseconds();
5726 
5727         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5728             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5729         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5730             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5731         else
5732             DE_ASSERT(false);
5733 
5734         endTime = deGetMicroseconds();
5735 
5736         sample.result.duration.firstRenderDuration = endTime - startTime;
5737     }
5738 
5739     // upload
5740     {
5741         glw::GLenum target;
5742         glw::GLsizeiptr size;
5743         glw::GLintptr offset = 0;
5744         const void *source;
5745 
5746         if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5747         {
5748             target = GL_ARRAY_BUFFER;
5749             size   = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5750             source = &vertexData[0];
5751         }
5752         else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5753         {
5754             target = GL_ELEMENT_ARRAY_BUFFER;
5755             size   = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
5756             source = &indexData[0];
5757         }
5758         else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5759         {
5760             target = GL_ARRAY_BUFFER;
5761             size   = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5762             offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5763             source = (const uint8_t *)&vertexData[0] + offset;
5764         }
5765         else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5766         {
5767             // upload to 25% - 75% range
5768             target = GL_ELEMENT_ARRAY_BUFFER;
5769             size   = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
5770             offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
5771             source = (const uint8_t *)&indexData[0] + offset;
5772         }
5773         else
5774         {
5775             DE_ASSERT(false);
5776             return;
5777         }
5778 
5779         // reference case? don't modify the buffer in use
5780         if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5781             gl.bindBuffer(target, *alternativeUploadBuffer);
5782 
5783         startTime = deGetMicroseconds();
5784 
5785         if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5786             gl.bufferData(target, size, source, GL_STREAM_DRAW);
5787         else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5788             gl.bufferSubData(target, offset, size, source);
5789         else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5790         {
5791             const int mapFlags =
5792                 (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) :
5793                 (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)  ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT) :
5794                                                             (-1);
5795             void *mapPtr;
5796             glw::GLboolean unmapSuccessful;
5797 
5798             mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5799             if (!mapPtr)
5800                 throw tcu::Exception("MapBufferRange returned NULL");
5801 
5802             deMemcpy(mapPtr, source, (int)size);
5803 
5804             // if unmapping fails, just try again later
5805             unmapSuccessful = gl.unmapBuffer(target);
5806             if (!unmapSuccessful)
5807                 throw UnmapFailureError();
5808         }
5809         else
5810             DE_ASSERT(false);
5811 
5812         endTime = deGetMicroseconds();
5813 
5814         sample.result.uploadedDataSize        = (int)size;
5815         sample.result.duration.uploadDuration = endTime - startTime;
5816     }
5817 
5818     // second draw
5819     {
5820         // Source vertex data from alternative buffer in refernce case
5821         if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5822             setupVertexAttribs();
5823 
5824         startTime = deGetMicroseconds();
5825 
5826         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5827             gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5828         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5829             gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5830         else
5831             DE_ASSERT(false);
5832 
5833         endTime = deGetMicroseconds();
5834 
5835         sample.result.duration.secondRenderDuration = endTime - startTime;
5836     }
5837 
5838     // read
5839     {
5840         startTime = deGetMicroseconds();
5841         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5842         endTime = deGetMicroseconds();
5843 
5844         sample.result.duration.readDuration = endTime - startTime;
5845     }
5846 
5847     // set results
5848 
5849     sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5850 
5851     sample.result.duration.renderReadDuration =
5852         sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5853     sample.result.duration.totalDuration =
5854         sample.result.duration.firstRenderDuration + sample.result.duration.uploadDuration +
5855         sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5856     sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5857 }
5858 
5859 class UploadWaitDrawCase : public RenderPerformanceTestBase
5860 {
5861 public:
5862     struct Sample
5863     {
5864         int numFrames;
5865         uint64_t uploadCallEndTime;
5866     };
5867     struct Result
5868     {
5869         uint64_t uploadDuration;
5870         uint64_t renderDuration;
5871         uint64_t readDuration;
5872         uint64_t renderReadDuration;
5873 
5874         uint64_t timeBeforeUse;
5875     };
5876 
5877     UploadWaitDrawCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
5878                        TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState);
5879     ~UploadWaitDrawCase(void);
5880 
5881 private:
5882     void init(void);
5883     void deinit(void);
5884     IterateResult iterate(void);
5885 
5886     void uploadBuffer(Sample &sample, Result &result);
5887     void drawFromBuffer(Sample &sample, Result &result);
5888     void reuseAndDeleteBuffer(void);
5889     void logAndSetTestResult(void);
5890     void logSamples(void);
5891     void drawMisc(void);
5892     int findStabilizationSample(uint64_t Result::*target, const char *description);
5893     bool checkSampleTemporalStability(uint64_t Result::*target, const char *description);
5894 
5895     const DrawMethod m_drawMethod;
5896     const TargetBuffer m_targetBuffer;
5897     const UploadMethod m_uploadMethod;
5898     const BufferState m_bufferState;
5899 
5900     const int m_numSamplesPerSwap;
5901     const int m_numMaxSwaps;
5902 
5903     int m_frameNdx;
5904     int m_sampleNdx;
5905     int m_numVertices;
5906 
5907     std::vector<tcu::Vec4> m_vertexData;
5908     std::vector<uint32_t> m_indexData;
5909     std::vector<Sample> m_samples;
5910     std::vector<Result> m_results;
5911     std::vector<int> m_iterationOrder;
5912 
5913     uint32_t m_vertexBuffer;
5914     uint32_t m_indexBuffer;
5915     uint32_t m_miscBuffer;
5916     int m_numMiscVertices;
5917 };
5918 
UploadWaitDrawCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState)5919 UploadWaitDrawCase::UploadWaitDrawCase(Context &context, const char *name, const char *description,
5920                                        DrawMethod drawMethod, TargetBuffer targetBuffer, UploadMethod uploadMethod,
5921                                        BufferState bufferState)
5922     : RenderPerformanceTestBase(context, name, description)
5923     , m_drawMethod(drawMethod)
5924     , m_targetBuffer(targetBuffer)
5925     , m_uploadMethod(uploadMethod)
5926     , m_bufferState(bufferState)
5927     , m_numSamplesPerSwap(10)
5928     , m_numMaxSwaps(4)
5929     , m_frameNdx(0)
5930     , m_sampleNdx(0)
5931     , m_numVertices(-1)
5932     , m_vertexBuffer(0)
5933     , m_indexBuffer(0)
5934     , m_miscBuffer(0)
5935     , m_numMiscVertices(-1)
5936 {
5937 }
5938 
~UploadWaitDrawCase(void)5939 UploadWaitDrawCase::~UploadWaitDrawCase(void)
5940 {
5941     deinit();
5942 }
5943 
init(void)5944 void UploadWaitDrawCase::init(void)
5945 {
5946     const glw::Functions &gl       = m_context.getRenderContext().getFunctions();
5947     const int vertexAttribSize     = (int)sizeof(tcu::Vec4) * 2; // color4, position4
5948     const int vertexIndexSize      = (int)sizeof(uint32_t);
5949     const int vertexUploadDataSize = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5950 
5951     RenderPerformanceTestBase::init();
5952 
5953     // requirements
5954 
5955     if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5956         m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5957         throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
5958                                      de::toString<int>(RENDER_AREA_SIZE) + " render target");
5959 
5960     // gl state
5961 
5962     gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5963 
5964     // enable bleding to prevent grid layers from being discarded
5965 
5966     gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5967     gl.blendEquation(GL_FUNC_ADD);
5968     gl.enable(GL_BLEND);
5969 
5970     // scene
5971 
5972     {
5973         LayeredGridSpec scene;
5974 
5975         // create ~8MB workload with similar characteristics as in the other test
5976         // => makes comparison to other results more straightforward
5977         scene.gridWidth  = 93;
5978         scene.gridHeight = 93;
5979         scene.gridLayers = 5;
5980 
5981         generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5982         generateLayeredGridIndexData(m_indexData, scene);
5983         m_numVertices = getLayeredGridNumVertices(scene);
5984     }
5985 
5986     // buffers
5987 
5988     if (m_bufferState == BUFFERSTATE_NEW)
5989     {
5990         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5991         {
5992             // reads from two buffers, prepare the static buffer
5993 
5994             if (m_targetBuffer == TARGETBUFFER_VERTEX)
5995             {
5996                 // index buffer is static, use another vertex buffer to keep original buffer in unused state
5997                 const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5998 
5999                 gl.genBuffers(1, &m_indexBuffer);
6000                 gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
6001                 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6002                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
6003                               &m_vertexData[0], GL_STATIC_DRAW);
6004                 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
6005                               &m_indexData[0], GL_STATIC_DRAW);
6006 
6007                 setupVertexAttribs();
6008                 gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6009             }
6010             else if (m_targetBuffer == TARGETBUFFER_INDEX)
6011             {
6012                 // vertex buffer is static
6013                 gl.genBuffers(1, &m_vertexBuffer);
6014                 gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6015                 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
6016                               &m_vertexData[0], GL_STATIC_DRAW);
6017 
6018                 setupVertexAttribs();
6019                 gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6020             }
6021             else
6022                 DE_ASSERT(false);
6023         }
6024     }
6025     else if (m_bufferState == BUFFERSTATE_EXISTING)
6026     {
6027         const glw::GLenum vertexUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
6028         const glw::GLenum indexUsage  = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
6029 
6030         gl.genBuffers(1, &m_vertexBuffer);
6031         gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6032         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0],
6033                       vertexUsage);
6034 
6035         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6036         {
6037             gl.genBuffers(1, &m_indexBuffer);
6038             gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6039             gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
6040                           &m_indexData[0], indexUsage);
6041         }
6042 
6043         setupVertexAttribs();
6044 
6045         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6046             gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6047         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6048             gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6049         else
6050             DE_ASSERT(false);
6051     }
6052     else
6053         DE_ASSERT(false);
6054 
6055     // misc draw buffer
6056     {
6057         std::vector<tcu::Vec4> vertexData;
6058         LayeredGridSpec scene;
6059 
6060         // create ~1.5MB workload with similar characteristics
6061         scene.gridWidth  = 40;
6062         scene.gridHeight = 40;
6063         scene.gridLayers = 5;
6064 
6065         generateLayeredGridVertexAttribData4C4V(vertexData, scene);
6066 
6067         gl.genBuffers(1, &m_miscBuffer);
6068         gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6069         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0],
6070                       GL_STATIC_DRAW);
6071 
6072         m_numMiscVertices = getLayeredGridNumVertices(scene);
6073     }
6074 
6075     // iterations
6076     {
6077         m_samples.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);
6078         m_results.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);
6079 
6080         for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
6081             for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
6082             {
6083                 const int index = numSwaps * m_numSamplesPerSwap + sampleNdx;
6084 
6085                 m_samples[index].numFrames = numSwaps;
6086             }
6087 
6088         m_iterationOrder.resize(m_samples.size());
6089         generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
6090     }
6091 
6092     // log
6093     m_testCtx.getLog()
6094         << tcu::TestLog::Message << "Measuring time used in "
6095         << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
6096         << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, "
6097         << m_numMaxSwaps << "].\n"
6098         << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index"))
6099         << " buffer.\n"
6100         << "Uploading using "
6101         << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ?
6102                 ("bufferData") :
6103             (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ?
6104                 ("bufferSubData") :
6105             (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
6106                 ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | "
6107                  "GL_MAP_UNSYNCHRONIZED_BIT") :
6108                 ((const char *)DE_NULL))
6109         << "\n"
6110         << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
6111         << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
6112         << "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
6113         << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
6114         << tcu::TestLog::EndMessage;
6115 }
6116 
deinit(void)6117 void UploadWaitDrawCase::deinit(void)
6118 {
6119     RenderPerformanceTestBase::deinit();
6120 
6121     if (m_vertexBuffer)
6122     {
6123         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
6124         m_vertexBuffer = 0;
6125     }
6126     if (m_indexBuffer)
6127     {
6128         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
6129         m_indexBuffer = 0;
6130     }
6131     if (m_miscBuffer)
6132     {
6133         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
6134         m_miscBuffer = 0;
6135     }
6136 }
6137 
iterate(void)6138 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate(void)
6139 {
6140     const glw::Functions &gl             = m_context.getRenderContext().getFunctions();
6141     const int betweenIterationFrameCount = 5; // draw misc between test samples
6142     const int frameNdx                   = m_frameNdx++;
6143     const int currentSampleNdx           = m_iterationOrder[m_sampleNdx];
6144 
6145     // Simulate work for about 8ms
6146     busyWait(8000);
6147 
6148     // Busywork rendering during unused frames
6149     if (frameNdx != m_samples[currentSampleNdx].numFrames)
6150     {
6151         // draw similar from another buffer
6152         drawMisc();
6153     }
6154 
6155     if (frameNdx == 0)
6156     {
6157         // upload and start the clock
6158         uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
6159     }
6160 
6161     if (frameNdx ==
6162         m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
6163     {
6164         // draw using the uploaded buffer
6165         drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
6166 
6167         // re-use buffer for something else to make sure test iteration do not affect each other
6168         if (m_bufferState == BUFFERSTATE_NEW)
6169             reuseAndDeleteBuffer();
6170     }
6171     else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationFrameCount)
6172     {
6173         // next sample
6174         ++m_sampleNdx;
6175         m_frameNdx = 0;
6176     }
6177 
6178     GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
6179 
6180     if (m_sampleNdx < (int)m_samples.size())
6181         return CONTINUE;
6182 
6183     logAndSetTestResult();
6184     return STOP;
6185 }
6186 
uploadBuffer(Sample & sample,Result & result)6187 void UploadWaitDrawCase::uploadBuffer(Sample &sample, Result &result)
6188 {
6189     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6190     uint64_t startTime;
6191     uint64_t endTime;
6192     glw::GLenum target;
6193     glw::GLsizeiptr size;
6194     const void *source;
6195 
6196     // data source
6197 
6198     if (m_targetBuffer == TARGETBUFFER_VERTEX)
6199     {
6200         DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
6201 
6202         target = GL_ARRAY_BUFFER;
6203         size   = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
6204         source = &m_vertexData[0];
6205     }
6206     else if (m_targetBuffer == TARGETBUFFER_INDEX)
6207     {
6208         DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
6209 
6210         target = GL_ELEMENT_ARRAY_BUFFER;
6211         size   = (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t));
6212         source = &m_indexData[0];
6213     }
6214     else
6215     {
6216         DE_ASSERT(false);
6217         return;
6218     }
6219 
6220     // gen buffer
6221 
6222     if (m_bufferState == BUFFERSTATE_NEW)
6223     {
6224         if (m_targetBuffer == TARGETBUFFER_VERTEX)
6225         {
6226             gl.genBuffers(1, &m_vertexBuffer);
6227             gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6228         }
6229         else if (m_targetBuffer == TARGETBUFFER_INDEX)
6230         {
6231             gl.genBuffers(1, &m_indexBuffer);
6232             gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6233         }
6234         else
6235             DE_ASSERT(false);
6236 
6237         if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA || m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
6238         {
6239             gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
6240         }
6241     }
6242     else if (m_bufferState == BUFFERSTATE_EXISTING)
6243     {
6244         if (m_targetBuffer == TARGETBUFFER_VERTEX)
6245             gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6246         else if (m_targetBuffer == TARGETBUFFER_INDEX)
6247             gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6248         else
6249             DE_ASSERT(false);
6250     }
6251     else
6252         DE_ASSERT(false);
6253 
6254     // upload
6255 
6256     startTime = deGetMicroseconds();
6257 
6258     if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
6259         gl.bufferData(target, size, source, GL_STATIC_DRAW);
6260     else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
6261         gl.bufferSubData(target, 0, size, source);
6262     else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
6263     {
6264         void *mapPtr;
6265         glw::GLboolean unmapSuccessful;
6266 
6267         mapPtr = gl.mapBufferRange(target, 0, size,
6268                                    GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
6269         if (!mapPtr)
6270             throw tcu::Exception("MapBufferRange returned NULL");
6271 
6272         deMemcpy(mapPtr, source, (int)size);
6273 
6274         // if unmapping fails, just try again later
6275         unmapSuccessful = gl.unmapBuffer(target);
6276         if (!unmapSuccessful)
6277             throw UnmapFailureError();
6278     }
6279     else
6280         DE_ASSERT(false);
6281 
6282     endTime = deGetMicroseconds();
6283 
6284     sample.uploadCallEndTime = endTime;
6285     result.uploadDuration    = endTime - startTime;
6286 }
6287 
drawFromBuffer(Sample & sample,Result & result)6288 void UploadWaitDrawCase::drawFromBuffer(Sample &sample, Result &result)
6289 {
6290     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6291     tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
6292     uint64_t startTime;
6293     uint64_t endTime;
6294 
6295     DE_ASSERT(m_vertexBuffer != 0);
6296     if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6297         DE_ASSERT(m_indexBuffer == 0);
6298     else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6299         DE_ASSERT(m_indexBuffer != 0);
6300     else
6301         DE_ASSERT(false);
6302 
6303     // draw
6304     {
6305         gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
6306         if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6307             gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
6308 
6309         setupVertexAttribs();
6310 
6311         // microseconds passed since return from upload call
6312         result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
6313 
6314         startTime = deGetMicroseconds();
6315 
6316         if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
6317             gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
6318         else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
6319             gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
6320         else
6321             DE_ASSERT(false);
6322 
6323         endTime = deGetMicroseconds();
6324 
6325         result.renderDuration = endTime - startTime;
6326     }
6327 
6328     // read
6329     {
6330         startTime = deGetMicroseconds();
6331         glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
6332         endTime = deGetMicroseconds();
6333 
6334         result.readDuration = endTime - startTime;
6335     }
6336 
6337     result.renderReadDuration = result.renderDuration + result.readDuration;
6338 }
6339 
reuseAndDeleteBuffer(void)6340 void UploadWaitDrawCase::reuseAndDeleteBuffer(void)
6341 {
6342     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6343 
6344     if (m_targetBuffer == TARGETBUFFER_INDEX)
6345     {
6346         // respecify and delete index buffer
6347         static const uint32_t indices[3] = {1, 3, 8};
6348 
6349         DE_ASSERT(m_indexBuffer != 0);
6350 
6351         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
6352         gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
6353         gl.deleteBuffers(1, &m_indexBuffer);
6354         m_indexBuffer = 0;
6355     }
6356     else if (m_targetBuffer == TARGETBUFFER_VERTEX)
6357     {
6358         // respecify and delete vertex buffer
6359         static const tcu::Vec4 coloredTriangle[6] = {
6360             tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),  tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),
6361             tcu::Vec4(-0.2f, 0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),   tcu::Vec4(0.8f, -0.1f, 0.0f, 1.0f),
6362         };
6363 
6364         DE_ASSERT(m_vertexBuffer != 0);
6365 
6366         gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
6367         gl.drawArrays(GL_TRIANGLES, 0, 3);
6368         gl.deleteBuffers(1, &m_vertexBuffer);
6369         m_vertexBuffer = 0;
6370     }
6371 
6372     waitGLResults();
6373 }
6374 
logAndSetTestResult(void)6375 void UploadWaitDrawCase::logAndSetTestResult(void)
6376 {
6377     int uploadStabilization;
6378     int renderReadStabilization;
6379     int renderStabilization;
6380     int readStabilization;
6381     bool temporallyStable;
6382 
6383     {
6384         const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
6385         logSamples();
6386     }
6387 
6388     {
6389         const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
6390 
6391         // log stabilization points
6392         renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
6393         uploadStabilization     = findStabilizationSample(&Result::uploadDuration, "Upload time");
6394         renderStabilization     = findStabilizationSample(&Result::renderDuration, "Draw call time");
6395         readStabilization       = findStabilizationSample(&Result::readDuration, "ReadPixels time");
6396 
6397         temporallyStable = true;
6398         temporallyStable &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
6399         temporallyStable &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
6400         temporallyStable &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
6401         temporallyStable &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
6402     }
6403 
6404     {
6405         const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
6406 
6407         // Check result sanily
6408         if (uploadStabilization != 0)
6409             m_testCtx.getLog() << tcu::TestLog::Message
6410                                << "Warning! Upload times are not stable, test result may not be accurate."
6411                                << tcu::TestLog::EndMessage;
6412         if (!temporallyStable)
6413             m_testCtx.getLog() << tcu::TestLog::Message
6414                                << "Warning! Time samples do not seem to be temporally stable, sample times seem to "
6415                                   "drift to one direction during test execution."
6416                                << tcu::TestLog::EndMessage;
6417 
6418         // render & read
6419         if (renderReadStabilization == -1)
6420             m_testCtx.getLog() << tcu::TestLog::Message
6421                                << "Combined time used in draw call and ReadPixels did not stabilize."
6422                                << tcu::TestLog::EndMessage;
6423         else
6424             m_testCtx.getLog() << tcu::TestLog::Integer(
6425                 "RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time",
6426                 "frames", QP_KEY_TAG_TIME, renderReadStabilization);
6427 
6428         // draw call
6429         if (renderStabilization == -1)
6430             m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize."
6431                                << tcu::TestLog::EndMessage;
6432         else
6433             m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint",
6434                                                         "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME,
6435                                                         renderStabilization);
6436 
6437         // readpixels
6438         if (readStabilization == -1)
6439             m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize."
6440                                << tcu::TestLog::EndMessage;
6441         else
6442             m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint",
6443                                                         "ReadPixels call time stabilization time", "frames",
6444                                                         QP_KEY_TAG_TIME, readStabilization);
6445 
6446         // Report renderReadStabilization
6447         if (renderReadStabilization != -1)
6448             m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
6449         else
6450             m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
6451     }
6452 }
6453 
logSamples(void)6454 void UploadWaitDrawCase::logSamples(void)
6455 {
6456     // Inverse m_iterationOrder
6457 
6458     std::vector<int> runOrder(m_iterationOrder.size());
6459     for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6460         runOrder[m_iterationOrder[ndx]] = ndx;
6461 
6462     // Log samples
6463 
6464     m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
6465                        << tcu::TestLog::ValueInfo("NumSwaps", "SwapBuffers before use", "",
6466                                                   QP_SAMPLE_VALUE_TAG_PREDICTOR)
6467                        << tcu::TestLog::ValueInfo("Delay", "Time before use", "us", QP_SAMPLE_VALUE_TAG_PREDICTOR)
6468                        << tcu::TestLog::ValueInfo("RunOrder", "Sample run order", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
6469                        << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us",
6470                                                   QP_SAMPLE_VALUE_TAG_RESPONSE)
6471                        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6472                        << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6473                        << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6474                        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
6475                        << tcu::TestLog::EndSampleInfo;
6476 
6477     for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
6478         m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx].numFrames
6479                            << (int)m_results[sampleNdx].timeBeforeUse << runOrder[sampleNdx]
6480                            << (int)m_results[sampleNdx].renderReadDuration
6481                            << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
6482                            << (int)m_results[sampleNdx].uploadDuration << (int)m_results[sampleNdx].renderDuration
6483                            << (int)m_results[sampleNdx].readDuration << tcu::TestLog::EndSample;
6484 
6485     m_testCtx.getLog() << tcu::TestLog::EndSampleList;
6486 }
6487 
drawMisc(void)6488 void UploadWaitDrawCase::drawMisc(void)
6489 {
6490     const glw::Functions &gl = m_context.getRenderContext().getFunctions();
6491 
6492     gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6493     setupVertexAttribs();
6494     gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
6495 }
6496 
6497 struct DistributionCompareResult
6498 {
6499     bool equal;
6500     float standardDeviations;
6501 };
6502 
6503 template <typename Comparer>
sumOfRanks(const std::vector<uint64_t> & testSamples,const std::vector<uint64_t> & allSamples,const Comparer & comparer)6504 static float sumOfRanks(const std::vector<uint64_t> &testSamples, const std::vector<uint64_t> &allSamples,
6505                         const Comparer &comparer)
6506 {
6507     float sum = 0;
6508 
6509     for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6510     {
6511         const uint64_t testSample = testSamples[sampleNdx];
6512         const int lowerIndex =
6513             (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6514         const int upperIndex =
6515             (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6516         const int lowerRank      = lowerIndex + 1; // convert zero-indexed to rank
6517         const int upperRank      = upperIndex;     // convert zero-indexed to rank, upperIndex is last equal + 1
6518         const float rankMidpoint = (float)(lowerRank + upperRank) / 2.0f;
6519 
6520         sum += rankMidpoint;
6521     }
6522 
6523     return sum;
6524 }
6525 
6526 template <typename Comparer>
distributionCompare(const std::vector<uint64_t> & orderedObservationsA,const std::vector<uint64_t> & orderedObservationsB,const Comparer & comparer)6527 static DistributionCompareResult distributionCompare(const std::vector<uint64_t> &orderedObservationsA,
6528                                                      const std::vector<uint64_t> &orderedObservationsB,
6529                                                      const Comparer &comparer)
6530 {
6531     // Mann-Whitney U test
6532 
6533     const int n1 = (int)orderedObservationsA.size();
6534     const int n2 = (int)orderedObservationsB.size();
6535     std::vector<uint64_t> allSamples(n1 + n2);
6536 
6537     std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6538     std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6539     std::sort(allSamples.begin(), allSamples.end());
6540 
6541     {
6542         const float R1 = sumOfRanks(orderedObservationsA, allSamples, comparer);
6543 
6544         const float U1 = (float)(n1 * n2 + n1 * (n1 + 1) / 2) - R1;
6545         const float U2 = (float)(n1 * n2) - U1;
6546         const float U  = de::min(U1, U2);
6547 
6548         // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6549 
6550         const float mU     = (float)(n1 * n2) / 2.0f;
6551         const float sigmaU = deFloatSqrt((float)(n1 * n2 * (n1 + n2 + 1)) / 12.0f);
6552         const float z      = (U - mU) / sigmaU;
6553 
6554         DistributionCompareResult result;
6555 
6556         result.equal              = (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6557         result.standardDeviations = z;
6558 
6559         return result;
6560     }
6561 }
6562 
6563 template <typename T>
6564 struct ThresholdComparer
6565 {
6566     float relativeThreshold;
6567     T absoluteThreshold;
6568 
operator ()deqp::gles3::Performance::__anone1143f0e0111::ThresholdComparer6569     bool operator()(const T &a, const T &b) const
6570     {
6571         const float diff = de::abs((float)a - (float)b);
6572 
6573         // thresholds
6574         if (diff <= (float)absoluteThreshold)
6575             return false;
6576         if (diff <= float(a) * relativeThreshold || diff <= float(b) * relativeThreshold)
6577             return false;
6578 
6579         // cmp
6580         return a < b;
6581     }
6582 };
6583 
findStabilizationSample(uint64_t UploadWaitDrawCase::Result::* target,const char * description)6584 int UploadWaitDrawCase::findStabilizationSample(uint64_t UploadWaitDrawCase::Result::*target, const char *description)
6585 {
6586     std::vector<std::vector<uint64_t>> sampleObservations(m_numMaxSwaps + 1);
6587     ThresholdComparer<uint64_t> comparer;
6588 
6589     comparer.relativeThreshold = 0.15f; // 15%
6590     comparer.absoluteThreshold = 100;   // (us), assumed sampling precision
6591 
6592     // get observations and order them
6593 
6594     for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6595     {
6596         int insertNdx = 0;
6597 
6598         sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6599 
6600         for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6601             if (m_samples[ndx].numFrames == swapNdx)
6602                 sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6603 
6604         DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6605 
6606         std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6607     }
6608 
6609     // find stabilization point
6610 
6611     for (int sampleNdx = m_numMaxSwaps - 1; sampleNdx != -1; --sampleNdx)
6612     {
6613         // Distribution is equal to all following distributions
6614         for (int cmpTargetDistribution = sampleNdx + 1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6615         {
6616             // Stable section ends here?
6617             const DistributionCompareResult result =
6618                 distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6619             if (!result.equal)
6620             {
6621                 // Last two samples are not equal? Samples never stabilized
6622                 if (sampleNdx == m_numMaxSwaps - 1)
6623                 {
6624                     m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
6625                                        << sampleNdx << " and " << cmpTargetDistribution
6626                                        << " do not seem to have the same distribution:\n"
6627                                        << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6628                                        << "\tSwap count " << sampleNdx
6629                                        << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6630                                        << "\tSwap count " << cmpTargetDistribution
6631                                        << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
6632                                        << "\n"
6633                                        << tcu::TestLog::EndMessage;
6634                     return -1;
6635                 }
6636                 else
6637                 {
6638                     m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
6639                                        << sampleNdx << " and " << cmpTargetDistribution
6640                                        << " do not seem to have the same distribution:\n"
6641                                        << "\tSamples with swap count " << sampleNdx
6642                                        << " are not part of the tail of stable results.\n"
6643                                        << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6644                                        << "\tSwap count " << sampleNdx
6645                                        << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6646                                        << "\tSwap count " << cmpTargetDistribution
6647                                        << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
6648                                        << "\n"
6649                                        << tcu::TestLog::EndMessage;
6650 
6651                     return sampleNdx + 1;
6652                 }
6653             }
6654         }
6655     }
6656 
6657     m_testCtx.getLog() << tcu::TestLog::Message << description << ": All samples seem to have the same distribution"
6658                        << tcu::TestLog::EndMessage;
6659 
6660     // all distributions equal
6661     return 0;
6662 }
6663 
checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::* target,const char * description)6664 bool UploadWaitDrawCase::checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::*target,
6665                                                       const char *description)
6666 {
6667     // Try to find correlation with sample order and sample times
6668 
6669     const int numDataPoints = (int)m_iterationOrder.size();
6670     std::vector<tcu::Vec2> dataPoints(m_iterationOrder.size());
6671     LineParametersWithConfidence lineFit;
6672 
6673     for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6674     {
6675         dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6676         dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6677     }
6678 
6679     lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6680 
6681     // Difference of more than 25% of the offset along the whole sample range
6682     if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f)
6683     {
6684         m_testCtx.getLog() << tcu::TestLog::Message << description
6685                            << ": Correlation with data point observation order and result time. Results are not "
6686                               "temporally stable, observations are not independent.\n"
6687                            << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6688                            << tcu::TestLog::EndMessage;
6689 
6690         return false;
6691     }
6692     else
6693         return true;
6694 }
6695 
6696 } // namespace
6697 
BufferDataUploadTests(Context & context)6698 BufferDataUploadTests::BufferDataUploadTests(Context &context)
6699     : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6700 {
6701 }
6702 
~BufferDataUploadTests(void)6703 BufferDataUploadTests::~BufferDataUploadTests(void)
6704 {
6705 }
6706 
init(void)6707 void BufferDataUploadTests::init(void)
6708 {
6709     static const struct BufferUsage
6710     {
6711         const char *name;
6712         uint32_t usage;
6713         bool primaryUsage;
6714     } bufferUsages[] = {
6715         {"stream_draw", GL_STREAM_DRAW, true},    {"stream_read", GL_STREAM_READ, false},
6716         {"stream_copy", GL_STREAM_COPY, false},   {"static_draw", GL_STATIC_DRAW, true},
6717         {"static_read", GL_STATIC_READ, false},   {"static_copy", GL_STATIC_COPY, false},
6718         {"dynamic_draw", GL_DYNAMIC_DRAW, true},  {"dynamic_read", GL_DYNAMIC_READ, false},
6719         {"dynamic_copy", GL_DYNAMIC_COPY, false},
6720     };
6721 
6722     tcu::TestCaseGroup *const referenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Reference functions");
6723     tcu::TestCaseGroup *const functionCallGroup =
6724         new tcu::TestCaseGroup(m_testCtx, "function_call", "Function call timing");
6725     tcu::TestCaseGroup *const modifyAfterUseGroup =
6726         new tcu::TestCaseGroup(m_testCtx, "modify_after_use", "Function call time after buffer has been used");
6727     tcu::TestCaseGroup *const renderAfterUploadGroup = new tcu::TestCaseGroup(
6728         m_testCtx, "render_after_upload", "Function call time of draw commands after buffer has been modified");
6729 
6730     addChild(referenceGroup);
6731     addChild(functionCallGroup);
6732     addChild(modifyAfterUseGroup);
6733     addChild(renderAfterUploadGroup);
6734 
6735     // .reference
6736     {
6737         static const struct BufferSizeRange
6738         {
6739             const char *name;
6740             int minBufferSize;
6741             int maxBufferSize;
6742             int numSamples;
6743             bool largeBuffersCase;
6744         } sizeRanges[] = {
6745             {"small_buffers", 0, 1 << 18, 64, false},      // !< 0kB - 256kB
6746             {"large_buffers", 1 << 18, 1 << 24, 32, true}, // !< 256kB - 16MB
6747         };
6748 
6749         for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6750         {
6751             referenceGroup->addChild(new ReferenceMemcpyCase(
6752                 m_context, std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6753                 "Test memcpy performance", sizeRanges[bufferSizeRangeNdx].minBufferSize,
6754                 sizeRanges[bufferSizeRangeNdx].maxBufferSize, sizeRanges[bufferSizeRangeNdx].numSamples,
6755                 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6756         }
6757     }
6758 
6759     // .function_call
6760     {
6761         const int minBufferSize  = 0;       // !< 0kiB
6762         const int maxBufferSize  = 1 << 24; // !< 16MiB
6763         const int numDataSamples = 25;
6764         const int numMapSamples  = 25;
6765 
6766         tcu::TestCaseGroup *const bufferDataMethodGroup =
6767             new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6768         tcu::TestCaseGroup *const bufferSubDataMethodGroup =
6769             new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6770         tcu::TestCaseGroup *const mapBufferRangeMethodGroup =
6771             new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6772 
6773         functionCallGroup->addChild(bufferDataMethodGroup);
6774         functionCallGroup->addChild(bufferSubDataMethodGroup);
6775         functionCallGroup->addChild(mapBufferRangeMethodGroup);
6776 
6777         // .buffer_data
6778         {
6779             static const struct TargetCase
6780             {
6781                 tcu::TestCaseGroup *group;
6782                 BufferDataUploadCase::CaseType caseType;
6783                 bool allUsages;
6784             } targetCases[] = {
6785                 {new tcu::TestCaseGroup(m_testCtx, "new_buffer", "Target new buffer"),
6786                  BufferDataUploadCase::CASE_NEW_BUFFER, true},
6787                 {new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer", "Target new unspecified buffer"),
6788                  BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER, true},
6789                 {new tcu::TestCaseGroup(m_testCtx, "specified_buffer", "Target new specified buffer"),
6790                  BufferDataUploadCase::CASE_SPECIFIED_BUFFER, true},
6791                 {new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Target buffer that was used in draw"),
6792                  BufferDataUploadCase::CASE_USED_BUFFER, true},
6793                 {new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer", "Target larger buffer that was used in draw"),
6794                  BufferDataUploadCase::CASE_USED_LARGER_BUFFER, false},
6795             };
6796 
6797             for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6798             {
6799                 bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6800 
6801                 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6802                     if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6803                         targetCases[targetNdx].group->addChild(new BufferDataUploadCase(
6804                             m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6805                             std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6806                             minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
6807                             targetCases[targetNdx].caseType));
6808             }
6809         }
6810 
6811         // .buffer_sub_data
6812         {
6813             static const struct FlagCase
6814             {
6815                 tcu::TestCaseGroup *group;
6816                 BufferSubDataUploadCase::CaseType parentCase;
6817                 bool allUsages;
6818                 int flags;
6819             } flagCases[] = {
6820                 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload", ""),
6821                  BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_FULL_UPLOAD},
6822                 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",
6823                                         "Clear buffer with bufferData(...,NULL) before sub data call"),
6824                  BufferSubDataUploadCase::CASE_USED_BUFFER, false,
6825                  BufferSubDataUploadCase::FLAG_FULL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
6826                 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload", ""),
6827                  BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD},
6828                 {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload",
6829                                         "Clear buffer with bufferData(...,NULL) before sub data call"),
6830                  BufferSubDataUploadCase::CASE_USED_BUFFER, false,
6831                  BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
6832             };
6833 
6834             for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6835             {
6836                 bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6837 
6838                 for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6839                     if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6840                         flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(
6841                             m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6842                             std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6843                             minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
6844                             flagCases[flagNdx].parentCase, flagCases[flagNdx].flags));
6845             }
6846         }
6847 
6848         // .map_buffer_range
6849         {
6850             static const struct FlagCase
6851             {
6852                 const char *name;
6853                 bool usefulForUnusedBuffers;
6854                 bool allUsages;
6855                 int glFlags;
6856                 int caseFlags;
6857             } flagCases[] = {
6858                 {"flag_write_full", true, true, GL_MAP_WRITE_BIT, 0},
6859                 {"flag_write_partial", true, true, GL_MAP_WRITE_BIT, MapBufferRangeCase::FLAG_PARTIAL},
6860                 {"flag_read_write_full", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, 0},
6861                 {"flag_read_write_partial", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,
6862                  MapBufferRangeCase::FLAG_PARTIAL},
6863                 {"flag_invalidate_range_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 0},
6864                 {"flag_invalidate_range_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
6865                  MapBufferRangeCase::FLAG_PARTIAL},
6866                 {"flag_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
6867                 {"flag_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
6868                  MapBufferRangeCase::FLAG_PARTIAL},
6869                 {"flag_write_full_manual_invalidate_buffer", false, false,
6870                  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
6871                 {"flag_write_partial_manual_invalidate_buffer", false, false,
6872                  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
6873                  MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
6874                 {"flag_unsynchronized_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, 0},
6875                 {"flag_unsynchronized_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,
6876                  MapBufferRangeCase::FLAG_PARTIAL},
6877                 {"flag_unsynchronized_and_invalidate_buffer_full", true, false,
6878                  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
6879                 {"flag_unsynchronized_and_invalidate_buffer_partial", true, false,
6880                  GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
6881                  MapBufferRangeCase::FLAG_PARTIAL},
6882             };
6883             static const struct FlushCases
6884             {
6885                 const char *name;
6886                 int glFlags;
6887                 int caseFlags;
6888             } flushCases[] = {
6889                 {"flag_flush_explicit_map_full", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, 0},
6890                 {"flag_flush_explicit_map_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6891                  MapBufferRangeFlushCase::FLAG_PARTIAL},
6892                 {"flag_flush_explicit_map_full_flush_in_parts", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6893                  MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS},
6894                 {"flag_flush_explicit_map_full_flush_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
6895                  MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL},
6896             };
6897             static const struct MapTestGroup
6898             {
6899                 int flags;
6900                 bool unusedBufferCase;
6901                 tcu::TestCaseGroup *group;
6902             } groups[] = {
6903                 {
6904                     MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,
6905                     true,
6906                     new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer",
6907                                            "Test with unused, unspecified buffers"),
6908                 },
6909                 {
6910                     MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,
6911                     true,
6912                     new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),
6913                 },
6914                 {0, false,
6915                  new tcu::TestCaseGroup(m_testCtx, "used_buffer",
6916                                         "Test with used (data has been sourced from a buffer) buffers")},
6917             };
6918 
6919             // we OR same flags to both range and flushRange cases, make sure it is legal
6920             DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER ==
6921                              (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6922             DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER ==
6923                              (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6924 
6925             for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6926             {
6927                 tcu::TestCaseGroup *const bufferTypeGroup = groups[groupNdx].group;
6928 
6929                 mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6930 
6931                 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6932                 {
6933                     if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6934                         continue;
6935 
6936                     tcu::TestCaseGroup *const bufferUsageGroup =
6937                         new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6938                     bufferTypeGroup->addChild(bufferUsageGroup);
6939 
6940                     for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6941                         if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6942                             bufferUsageGroup->addChild(new MapBufferRangeCase(
6943                                 m_context, bufferUsages[usageNdx].name,
6944                                 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6945                                 minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
6946                                 flagCases[caseNdx].glFlags, flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6947                 }
6948 
6949                 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6950                 {
6951                     tcu::TestCaseGroup *const bufferUsageGroup =
6952                         new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6953                     bufferTypeGroup->addChild(bufferUsageGroup);
6954 
6955                     for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6956                         if (bufferUsages[usageNdx].primaryUsage)
6957                             bufferUsageGroup->addChild(new MapBufferRangeFlushCase(
6958                                 m_context, bufferUsages[usageNdx].name,
6959                                 std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6960                                 minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
6961                                 flushCases[caseNdx].glFlags, flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6962                 }
6963             }
6964         }
6965     }
6966 
6967     // .modify_after_use
6968     {
6969         const int minBufferSize = 0;       // !< 0kiB
6970         const int maxBufferSize = 1 << 24; // !< 16MiB
6971 
6972         static const struct Usage
6973         {
6974             const char *name;
6975             const char *description;
6976             uint32_t usage;
6977         } usages[] = {
6978             {"static_draw", "Test with GL_STATIC_DRAW", GL_STATIC_DRAW},
6979             {"dynamic_draw", "Test with GL_DYNAMIC_DRAW", GL_DYNAMIC_DRAW},
6980             {"stream_draw", "Test with GL_STREAM_DRAW", GL_STREAM_DRAW},
6981 
6982         };
6983 
6984         for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6985         {
6986             tcu::TestCaseGroup *const usageGroup =
6987                 new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6988             modifyAfterUseGroup->addChild(usageGroup);
6989 
6990             usageGroup->addChild(new ModifyAfterWithBufferDataCase(m_context, "buffer_data",
6991                                                                    "Respecify buffer contents after use", minBufferSize,
6992                                                                    maxBufferSize, usages[usageNdx].usage, 0));
6993             usageGroup->addChild(new ModifyAfterWithBufferDataCase(
6994                 m_context, "buffer_data_different_size", "Respecify buffer contents and size after use", minBufferSize,
6995                 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6996             usageGroup->addChild(new ModifyAfterWithBufferDataCase(
6997                 m_context, "buffer_data_repeated", "Respecify buffer contents after upload and use", minBufferSize,
6998                 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6999 
7000             usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7001                 m_context, "buffer_sub_data_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
7002                 usages[usageNdx].usage, 0));
7003             usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7004                 m_context, "buffer_sub_data_partial", "Respecify buffer contents partially use", minBufferSize,
7005                 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
7006             usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7007                 m_context, "buffer_sub_data_full_repeated", "Respecify buffer contents after upload and use",
7008                 minBufferSize, maxBufferSize, usages[usageNdx].usage,
7009                 ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
7010             usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
7011                 m_context, "buffer_sub_data_partial_repeated", "Respecify buffer contents partially upload and use",
7012                 minBufferSize, maxBufferSize, usages[usageNdx].usage,
7013                 ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED |
7014                     ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
7015 
7016             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7017                 m_context, "map_flag_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
7018                 usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT));
7019             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7020                 m_context, "map_flag_write_partial", "Respecify buffer contents partially after use", minBufferSize,
7021                 maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7022                 GL_MAP_WRITE_BIT));
7023             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7024                 m_context, "map_flag_read_write_full", "Respecify buffer contents after use", minBufferSize,
7025                 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
7026             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7027                 m_context, "map_flag_read_write_partial", "Respecify buffer contents partially after use",
7028                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7029                 GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
7030             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7031                 m_context, "map_flag_invalidate_range_full", "Respecify buffer contents after use", minBufferSize,
7032                 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
7033             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7034                 m_context, "map_flag_invalidate_range_partial", "Respecify buffer contents partially after use",
7035                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7036                 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
7037             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7038                 m_context, "map_flag_invalidate_buffer_full", "Respecify buffer contents after use", minBufferSize,
7039                 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
7040             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7041                 m_context, "map_flag_invalidate_buffer_partial", "Respecify buffer contents partially after use",
7042                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7043                 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
7044             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7045                 m_context, "map_flag_unsynchronized_full", "Respecify buffer contents after use", minBufferSize,
7046                 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
7047             usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
7048                 m_context, "map_flag_unsynchronized_partial", "Respecify buffer contents partially after use",
7049                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
7050                 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
7051 
7052             usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
7053                 m_context, "map_flag_flush_explicit_full", "Respecify buffer contents after use", minBufferSize,
7054                 maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
7055             usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
7056                 m_context, "map_flag_flush_explicit_partial", "Respecify buffer contents partially after use",
7057                 minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,
7058                 GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
7059         }
7060     }
7061 
7062     // .render_after_upload
7063     {
7064         // .reference
7065         {
7066             tcu::TestCaseGroup *const renderReferenceGroup =
7067                 new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
7068             renderAfterUploadGroup->addChild(renderReferenceGroup);
7069 
7070             // .draw
7071             {
7072                 tcu::TestCaseGroup *const drawGroup =
7073                     new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
7074                 renderReferenceGroup->addChild(drawGroup);
7075 
7076                 // Time consumed by readPixels
7077                 drawGroup->addChild(new ReferenceReadPixelsTimeCase(
7078                     m_context, "read_pixels", "Measure time consumed by readPixels() function call"));
7079 
7080                 // Time consumed by rendering
7081                 drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_arrays",
7082                                                                 "Measure time consumed by drawArrays() function call",
7083                                                                 DRAWMETHOD_DRAW_ARRAYS));
7084                 drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_elements",
7085                                                                 "Measure time consumed by drawElements() function call",
7086                                                                 DRAWMETHOD_DRAW_ELEMENTS));
7087             }
7088 
7089             // .draw_upload_draw
7090             {
7091                 static const struct
7092                 {
7093                     const char *name;
7094                     const char *description;
7095                     DrawMethod drawMethod;
7096                     TargetBuffer targetBuffer;
7097                     bool partial;
7098                 } uploadTargets[] = {
7099                     {"draw_arrays_upload_vertices",
7100                      "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
7101                      "function calls.",
7102                      DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7103                     {"draw_arrays_upload_vertices_partial",
7104                      "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
7105                      "readPixels function calls.",
7106                      DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7107                     {"draw_elements_upload_vertices",
7108                      "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and "
7109                      "readPixels function calls.",
7110                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7111                     {"draw_elements_upload_indices",
7112                      "Measure time consumed by drawElements, index upload, another drawElements, and readPixels "
7113                      "function calls.",
7114                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7115                     {"draw_elements_upload_indices_partial",
7116                      "Measure time consumed by drawElements, partial index upload, another drawElements, and "
7117                      "readPixels function calls.",
7118                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7119                 };
7120                 static const struct
7121                 {
7122                     const char *name;
7123                     const char *description;
7124                     UploadMethod uploadMethod;
7125                     BufferInUseRenderTimeCase::MapFlags mapFlags;
7126                     bool supportsPartialUpload;
7127                 } uploadMethods[] = {
7128                     {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE,
7129                      false},
7130                     {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
7131                      BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
7132                     {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7133                      BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
7134                     {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7135                      BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
7136                 };
7137 
7138                 tcu::TestCaseGroup *const drawUploadDrawGroup = new tcu::TestCaseGroup(
7139                     m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
7140                 renderReferenceGroup->addChild(drawUploadDrawGroup);
7141 
7142                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7143                     for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7144                          ++uploadMethodNdx)
7145                     {
7146                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7147                                                  uploadMethods[uploadMethodNdx].name;
7148 
7149                         if (uploadTargets[uploadTargetNdx].partial &&
7150                             !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7151                             continue;
7152 
7153                         drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(
7154                             m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7155                             uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
7156                             uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
7157                             (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7158                             BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
7159                     }
7160             }
7161         }
7162 
7163         // .upload_unrelated_and_draw
7164         {
7165             static const struct
7166             {
7167                 const char *name;
7168                 const char *description;
7169                 DrawMethod drawMethod;
7170             } drawMethods[] = {
7171                 {"draw_arrays", "drawArrays", DRAWMETHOD_DRAW_ARRAYS},
7172                 {"draw_elements", "drawElements", DRAWMETHOD_DRAW_ELEMENTS},
7173             };
7174 
7175             static const struct
7176             {
7177                 const char *name;
7178                 UploadMethod uploadMethod;
7179             } uploadMethods[] = {
7180                 {"buffer_data", UPLOADMETHOD_BUFFER_DATA},
7181                 {"buffer_sub_data", UPLOADMETHOD_BUFFER_SUB_DATA},
7182                 {"map_buffer_range", UPLOADMETHOD_MAP_BUFFER_RANGE},
7183             };
7184 
7185             tcu::TestCaseGroup *const uploadUnrelatedGroup = new tcu::TestCaseGroup(
7186                 m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
7187             renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
7188 
7189             for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
7190                 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
7191                 {
7192                     const std::string name = std::string() + drawMethods[drawMethodNdx].name +
7193                                              "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
7194                     const std::string desc = std::string() + "Measure time consumed by " +
7195                                              drawMethods[drawMethodNdx].description +
7196                                              " function call after an unrelated upload";
7197 
7198                     // Time consumed by rendering command after an unrelated upload
7199 
7200                     uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(
7201                         m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod,
7202                         uploadMethods[uploadMethodNdx].uploadMethod));
7203                 }
7204         }
7205 
7206         // .upload_and_draw
7207         {
7208             static const struct
7209             {
7210                 const char *name;
7211                 const char *description;
7212                 BufferState bufferState;
7213                 UnrelatedBufferType unrelatedBuffer;
7214                 bool supportsPartialUpload;
7215             } bufferConfigs[] = {
7216                 {"used_buffer", "Upload to an used buffer", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_NONE, true},
7217                 {"new_buffer", "Upload to a new buffer", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_NONE, false},
7218                 {"used_buffer_and_unrelated_upload", "Upload to an used buffer and an unrelated buffer and then draw",
7219                  BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_VERTEX, true},
7220                 {"new_buffer_and_unrelated_upload", "Upload to a new buffer and an unrelated buffer and then draw",
7221                  BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_VERTEX, false},
7222             };
7223 
7224             tcu::TestCaseGroup *const uploadAndDrawGroup = new tcu::TestCaseGroup(
7225                 m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
7226             renderAfterUploadGroup->addChild(uploadAndDrawGroup);
7227 
7228             // .used_buffer
7229             // .new_buffer
7230             // .used_buffer_and_unrelated_upload
7231             // .new_buffer_and_unrelated_upload
7232             for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
7233             {
7234                 static const struct
7235                 {
7236                     const char *name;
7237                     const char *description;
7238                     DrawMethod drawMethod;
7239                     TargetBuffer targetBuffer;
7240                     bool partial;
7241                 } uploadTargets[] = {
7242                     {"draw_arrays_upload_vertices",
7243                      "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
7244                      DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7245                     {"draw_arrays_upload_vertices_partial",
7246                      "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function "
7247                      "calls",
7248                      DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7249                     {"draw_elements_upload_vertices",
7250                      "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
7251                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7252                     {"draw_elements_upload_indices",
7253                      "Measure time consumed by index upload, drawElements, and readPixels function calls",
7254                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7255                     {"draw_elements_upload_indices_partial",
7256                      "Measure time consumed by partial index upload, drawElements, and readPixels function calls",
7257                      DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7258                 };
7259                 static const struct
7260                 {
7261                     const char *name;
7262                     const char *description;
7263                     UploadMethod uploadMethod;
7264                     bool supportsPartialUpload;
7265                 } uploadMethods[] = {
7266                     {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, false},
7267                     {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, true},
7268                     {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, true},
7269                 };
7270 
7271                 tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name,
7272                                                                          bufferConfigs[stateNdx].description);
7273                 uploadAndDrawGroup->addChild(group);
7274 
7275                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7276                     for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7277                          ++uploadMethodNdx)
7278                     {
7279                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7280                                                  uploadMethods[uploadMethodNdx].name;
7281 
7282                         if (uploadTargets[uploadTargetNdx].partial &&
7283                             !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7284                             continue;
7285                         if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
7286                             continue;
7287 
7288                         // Don't log unrelated buffer information to samples if there is no such buffer
7289 
7290                         if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
7291                         {
7292                             typedef UploadRenderReadDuration SampleType;
7293                             typedef GenericUploadRenderTimeCase<SampleType> TestType;
7294 
7295                             group->addChild(new TestType(
7296                                 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7297                                 uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7298                                 uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
7299                                 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7300                                 bufferConfigs[stateNdx].unrelatedBuffer));
7301                         }
7302                         else
7303                         {
7304                             typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType;
7305                             typedef GenericUploadRenderTimeCase<SampleType> TestType;
7306 
7307                             group->addChild(new TestType(
7308                                 m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7309                                 uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7310                                 uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
7311                                 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7312                                 bufferConfigs[stateNdx].unrelatedBuffer));
7313                         }
7314                     }
7315             }
7316         }
7317 
7318         // .draw_modify_draw
7319         {
7320             static const struct
7321             {
7322                 const char *name;
7323                 const char *description;
7324                 DrawMethod drawMethod;
7325                 TargetBuffer targetBuffer;
7326                 bool partial;
7327             } uploadTargets[] = {
7328                 {"draw_arrays_upload_vertices",
7329                  "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
7330                  "function calls.",
7331                  DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
7332                 {"draw_arrays_upload_vertices_partial",
7333                  "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
7334                  "readPixels function calls.",
7335                  DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
7336                 {"draw_elements_upload_vertices",
7337                  "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels "
7338                  "function calls.",
7339                  DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
7340                 {"draw_elements_upload_indices",
7341                  "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function "
7342                  "calls.",
7343                  DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
7344                 {"draw_elements_upload_indices_partial",
7345                  "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels "
7346                  "function calls.",
7347                  DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
7348             };
7349             static const struct
7350             {
7351                 const char *name;
7352                 const char *description;
7353                 UploadMethod uploadMethod;
7354                 BufferInUseRenderTimeCase::MapFlags mapFlags;
7355                 bool supportsPartialUpload;
7356             } uploadMethods[] = {
7357                 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false},
7358                 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
7359                  BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
7360                 {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7361                  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
7362                 {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
7363                  BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
7364             };
7365 
7366             tcu::TestCaseGroup *const drawModifyDrawGroup = new tcu::TestCaseGroup(
7367                 m_testCtx, "draw_modify_draw",
7368                 "Time used in rendering functions with modified buffers while original buffer is still in use");
7369             renderAfterUploadGroup->addChild(drawModifyDrawGroup);
7370 
7371             for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7372                 for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
7373                 {
7374                     const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7375                                              uploadMethods[uploadMethodNdx].name;
7376 
7377                     if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
7378                         continue;
7379 
7380                     drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(
7381                         m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7382                         uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
7383                         uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
7384                         (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
7385                         BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
7386                 }
7387         }
7388 
7389         // .upload_wait_draw
7390         {
7391             static const struct
7392             {
7393                 const char *name;
7394                 const char *description;
7395                 BufferState bufferState;
7396             } bufferStates[] = {
7397                 {"new_buffer", "Uploading to just generated name", BUFFERSTATE_NEW},
7398                 {"used_buffer", "Uploading to a used buffer", BUFFERSTATE_EXISTING},
7399             };
7400             static const struct
7401             {
7402                 const char *name;
7403                 const char *description;
7404                 DrawMethod drawMethod;
7405                 TargetBuffer targetBuffer;
7406             } uploadTargets[] = {
7407                 {"draw_arrays_vertices", "Upload vertex data, draw with drawArrays", DRAWMETHOD_DRAW_ARRAYS,
7408                  TARGETBUFFER_VERTEX},
7409                 {"draw_elements_vertices", "Upload vertex data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
7410                  TARGETBUFFER_VERTEX},
7411                 {"draw_elements_indices", "Upload index data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
7412                  TARGETBUFFER_INDEX},
7413             };
7414             static const struct
7415             {
7416                 const char *name;
7417                 const char *description;
7418                 UploadMethod uploadMethod;
7419             } uploadMethods[] = {
7420                 {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA},
7421                 {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA},
7422                 {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE},
7423             };
7424 
7425             tcu::TestCaseGroup *const uploadSwapDrawGroup = new tcu::TestCaseGroup(
7426                 m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
7427             renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
7428 
7429             for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
7430             {
7431                 tcu::TestCaseGroup *const bufferGroup = new tcu::TestCaseGroup(
7432                     m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
7433                 uploadSwapDrawGroup->addChild(bufferGroup);
7434 
7435                 for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
7436                     for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
7437                          ++uploadMethodNdx)
7438                     {
7439                         const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
7440                                                  uploadMethods[uploadMethodNdx].name;
7441 
7442                         bufferGroup->addChild(new UploadWaitDrawCase(
7443                             m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
7444                             uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
7445                             uploadMethods[uploadMethodNdx].uploadMethod, bufferStates[bufferStateNdx].bufferState));
7446                     }
7447             }
7448         }
7449     }
7450 }
7451 
7452 } // namespace Performance
7453 } // namespace gles3
7454 } // namespace deqp
7455