/* * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef BRW_PERFORMANCE_QUERY_H #define BRW_PERFORMANCE_QUERY_H #include #include "brw_context.h" #include "brw_performance_query_metrics.h" /* * When currently allocate only one page for pipeline statistics queries. Here * we derived the maximum number of counters for that amount. */ #define STATS_BO_SIZE 4096 #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) /* * The largest OA formats we can use include: * For Haswell: * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. * For Gen8+ * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters */ #define MAX_OA_REPORT_COUNTERS 62 /** * i965 representation of a performance query object. * * NB: We want to keep this structure relatively lean considering that * applications may expect to allocate enough objects to be able to * query around all draw calls in a frame. */ struct brw_perf_query_object { struct gl_perf_query_object base; const struct brw_perf_query_info *query; /* See query->kind to know which state below is in use... */ union { struct { /** * BO containing OA counter snapshots at query Begin/End time. */ struct brw_bo *bo; /** * Address of mapped of @bo */ void *map; /** * The MI_REPORT_PERF_COUNT command lets us specify a unique * ID that will be reflected in the resulting OA report * that's written by the GPU. This is the ID we're expecting * in the begin report and the the end report should be * @begin_report_id + 1. */ int begin_report_id; /** * Reference the head of the brw->perfquery.sample_buffers * list at the time that the query started (so we only need * to look at nodes after this point when looking for samples * related to this query) * * (See struct brw_oa_sample_buf description for more details) */ struct exec_node *samples_head; /** * Storage for the final accumulated OA counters. */ uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; /** * Hw ID used by the context on which the query was running. */ uint32_t hw_id; /** * false while in the unaccumulated_elements list, and set to * true when the final, end MI_RPC snapshot has been * accumulated. */ bool results_accumulated; /** * Number of reports accumulated to produce the results. */ uint32_t reports_accumulated; /** * Frequency of the GT at begin and end of the query. */ uint64_t gt_frequency[2]; /** * Frequency in the slices of the GT at the begin and end of the * query. */ uint64_t slice_frequency[2]; /** * Frequency in the unslice of the GT at the begin and end of the * query. */ uint64_t unslice_frequency[2]; } oa; struct { /** * BO containing starting and ending snapshots for the * statistics counters. */ struct brw_bo *bo; } pipeline_stats; }; }; static inline struct brw_perf_query_info * brw_perf_query_append_query_info(struct brw_context *brw) { brw->perfquery.queries = reralloc(brw, brw->perfquery.queries, struct brw_perf_query_info, ++brw->perfquery.n_queries); return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; } static inline void brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query, uint32_t reg, uint32_t numerator, uint32_t denominator, const char *name, const char *description) { struct brw_perf_query_counter *counter; assert(query->n_counters < MAX_STAT_COUNTERS); counter = &query->counters[query->n_counters]; counter->name = name; counter->desc = description; counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; counter->size = sizeof(uint64_t); counter->offset = sizeof(uint64_t) * query->n_counters; counter->pipeline_stat.reg = reg; counter->pipeline_stat.numerator = numerator; counter->pipeline_stat.denominator = denominator; query->n_counters++; } static inline void brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query, uint32_t reg, const char *name) { brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); } /* Accumulate 32bits OA counters */ static inline void brw_perf_query_accumulate_uint32(const uint32_t *report0, const uint32_t *report1, uint64_t *accumulator) { *accumulator += (uint32_t)(*report1 - *report0); } /* Accumulate 40bits OA counters */ static inline void brw_perf_query_accumulate_uint40(int a_index, const uint32_t *report0, const uint32_t *report1, uint64_t *accumulator) { const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; uint64_t value0 = report0[a_index + 4] | high0; uint64_t value1 = report1[a_index + 4] | high1; uint64_t delta; if (value0 > value1) delta = (1ULL << 40) + value1 - value0; else delta = value1 - value0; *accumulator += delta; } int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, struct brw_perf_query_object *obj, size_t data_size, uint8_t *data); void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw); void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw); #endif /* BRW_PERFORMANCE_QUERY_H */