|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include "AbstractConfig.h" |
|
#include "ActivityType.h" |
|
|
|
#include <assert.h> |
|
#include <chrono> |
|
#include <functional> |
|
#include <set> |
|
#include <string> |
|
#include <vector> |
|
|
|
namespace libkineto { |
|
|
|
class Config : public AbstractConfig { |
|
public: |
|
Config(); |
|
Config& operator=(const Config&) = delete; |
|
Config(Config&&) = delete; |
|
Config& operator=(Config&&) = delete; |
|
|
|
|
|
std::unique_ptr<Config> clone() const { |
|
auto cfg = std::unique_ptr<Config>(new Config(*this)); |
|
cloneFeaturesInto(*cfg); |
|
return cfg; |
|
} |
|
|
|
bool handleOption(const std::string& name, std::string& val) override; |
|
|
|
void setClientDefaults() override; |
|
|
|
|
|
const std::string& eventLogFile() const { |
|
return eventLogFile_; |
|
} |
|
|
|
bool activityProfilerEnabled() const { |
|
return activityProfilerEnabled_ || |
|
activitiesOnDemandTimestamp_.time_since_epoch().count() > 0; |
|
} |
|
|
|
|
|
const std::string& activitiesLogFile() const { |
|
return activitiesLogFile_; |
|
} |
|
|
|
|
|
const std::string& activitiesLogUrl() const { |
|
return activitiesLogUrl_; |
|
} |
|
|
|
void setActivitiesLogUrl(const std::string& url) { |
|
activitiesLogUrl_ = url; |
|
} |
|
|
|
bool activitiesLogToMemory() const { |
|
return activitiesLogToMemory_; |
|
} |
|
|
|
bool eventProfilerEnabled() const { |
|
return !eventNames_.empty() || !metricNames_.empty(); |
|
} |
|
|
|
|
|
bool eventProfilerEnabledForDevice(uint32_t dev) const { |
|
return 0 != (eventProfilerDeviceMask_ & (1 << dev)); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds samplePeriod() const { |
|
return samplePeriod_; |
|
} |
|
|
|
void setSamplePeriod(std::chrono::milliseconds period) { |
|
samplePeriod_ = period; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::chrono::milliseconds multiplexPeriod() const { |
|
return multiplexPeriod_; |
|
} |
|
|
|
void setMultiplexPeriod(std::chrono::milliseconds period) { |
|
multiplexPeriod_ = period; |
|
} |
|
|
|
|
|
|
|
std::chrono::milliseconds reportPeriod() const { |
|
return reportPeriod_; |
|
} |
|
|
|
void setReportPeriod(std::chrono::milliseconds msecs); |
|
|
|
|
|
|
|
|
|
int samplesPerReport() const { |
|
return samplesPerReport_; |
|
} |
|
|
|
void setSamplesPerReport(int count) { |
|
samplesPerReport_ = count; |
|
} |
|
|
|
|
|
const std::set<std::string>& eventNames() const { |
|
return eventNames_; |
|
} |
|
|
|
|
|
void addEvents(const std::set<std::string>& names) { |
|
eventNames_.insert(names.begin(), names.end()); |
|
} |
|
|
|
|
|
const std::set<std::string>& metricNames() const { |
|
return metricNames_; |
|
} |
|
|
|
|
|
void addMetrics(const std::set<std::string>& names) { |
|
metricNames_.insert(names.begin(), names.end()); |
|
} |
|
|
|
const std::vector<int>& percentiles() const { |
|
return eventReportPercentiles_; |
|
} |
|
|
|
|
|
std::chrono::seconds eventProfilerOnDemandDuration() const { |
|
return eventProfilerOnDemandDuration_; |
|
} |
|
|
|
void setEventProfilerOnDemandDuration(std::chrono::seconds duration) { |
|
eventProfilerOnDemandDuration_ = duration; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int maxEventProfilersPerGpu() const { |
|
return eventProfilerMaxInstancesPerGpu_; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod() const { |
|
return eventProfilerHeartbeatMonitorPeriod_; |
|
} |
|
|
|
|
|
const std::set<ActivityType>& selectedActivityTypes() const { |
|
return selectedActivityTypes_; |
|
} |
|
|
|
|
|
bool perThreadBufferEnabled() const { |
|
return perThreadBufferEnabled_; |
|
} |
|
|
|
void setSelectedActivityTypes(const std::set<ActivityType>& types) { |
|
selectedActivityTypes_ = types; |
|
} |
|
|
|
bool isReportInputShapesEnabled() const { |
|
return enableReportInputShapes_; |
|
} |
|
|
|
bool isProfileMemoryEnabled() const { |
|
return enableProfileMemory_; |
|
} |
|
|
|
bool isWithStackEnabled() const { |
|
return enableWithStack_; |
|
} |
|
|
|
bool isWithFlopsEnabled() const { |
|
return enableWithFlops_; |
|
} |
|
|
|
bool isWithModulesEnabled() const { |
|
return enableWithModules_; |
|
} |
|
|
|
|
|
std::chrono::milliseconds activitiesDuration() const { |
|
return activitiesDuration_; |
|
} |
|
|
|
|
|
int activitiesRunIterations() const { |
|
return activitiesRunIterations_; |
|
} |
|
|
|
int activitiesMaxGpuBufferSize() const { |
|
return activitiesMaxGpuBufferSize_; |
|
} |
|
|
|
std::chrono::seconds activitiesWarmupDuration() const { |
|
return activitiesWarmupDuration_; |
|
} |
|
|
|
int activitiesWarmupIterations() const { |
|
return activitiesWarmupIterations_; |
|
} |
|
|
|
|
|
bool activitiesCudaSyncWaitEvents() const { |
|
return activitiesCudaSyncWaitEvents_; |
|
} |
|
|
|
void setActivitiesCudaSyncWaitEvents(bool enable) { |
|
activitiesCudaSyncWaitEvents_ = enable; |
|
} |
|
|
|
|
|
const std::chrono::time_point<std::chrono::system_clock> requestTimestamp() |
|
const { |
|
if (profileStartTime_.time_since_epoch().count()) { |
|
return profileStartTime_; |
|
} |
|
|
|
if (requestTimestamp_.time_since_epoch().count() == 0) { |
|
return requestTimestamp_; |
|
} |
|
|
|
|
|
return requestTimestamp_ + maxRequestAge() + activitiesWarmupDuration(); |
|
} |
|
|
|
bool hasProfileStartTime() const { |
|
return requestTimestamp_.time_since_epoch().count() > 0 || |
|
profileStartTime_.time_since_epoch().count() > 0; |
|
} |
|
|
|
int profileStartIteration() const { |
|
return profileStartIteration_; |
|
} |
|
|
|
bool hasProfileStartIteration() const { |
|
return profileStartIteration_ >= 0 && activitiesRunIterations_ > 0; |
|
} |
|
|
|
void setProfileStartIteration(int iter) { |
|
profileStartIteration_ = iter; |
|
} |
|
|
|
int profileStartIterationRoundUp() const { |
|
return profileStartIterationRoundUp_; |
|
} |
|
|
|
|
|
int startIterationIncludingWarmup() const { |
|
if (!hasProfileStartIteration()) { |
|
return -1; |
|
} |
|
return profileStartIteration_ - activitiesWarmupIterations_; |
|
} |
|
|
|
const std::chrono::seconds maxRequestAge() const; |
|
|
|
|
|
|
|
|
|
int verboseLogLevel() const { |
|
return verboseLogLevel_; |
|
} |
|
|
|
|
|
|
|
const std::vector<std::string>& verboseLogModules() const { |
|
return verboseLogModules_; |
|
} |
|
|
|
bool sigUsr2Enabled() const { |
|
return enableSigUsr2_; |
|
} |
|
|
|
bool ipcFabricEnabled() const { |
|
return enableIpcFabric_; |
|
} |
|
|
|
std::chrono::seconds onDemandConfigUpdateIntervalSecs() const { |
|
return onDemandConfigUpdateIntervalSecs_; |
|
} |
|
|
|
static std::chrono::milliseconds alignUp( |
|
std::chrono::milliseconds duration, |
|
std::chrono::milliseconds alignment) { |
|
duration += alignment; |
|
return duration - (duration % alignment); |
|
} |
|
|
|
std::chrono::time_point<std::chrono::system_clock> |
|
eventProfilerOnDemandStartTime() const { |
|
return eventProfilerOnDemandTimestamp_; |
|
} |
|
|
|
std::chrono::time_point<std::chrono::system_clock> |
|
eventProfilerOnDemandEndTime() const { |
|
return eventProfilerOnDemandTimestamp_ + eventProfilerOnDemandDuration_; |
|
} |
|
|
|
std::chrono::time_point<std::chrono::system_clock> |
|
activityProfilerRequestReceivedTime() const { |
|
return activitiesOnDemandTimestamp_; |
|
} |
|
|
|
static constexpr std::chrono::milliseconds kControllerIntervalMsecs{1000}; |
|
|
|
|
|
const std::string& requestTraceID() const { |
|
return requestTraceID_; |
|
} |
|
|
|
void setRequestTraceID(const std::string& tid) { |
|
requestTraceID_ = tid; |
|
} |
|
|
|
const std::string& requestGroupTraceID() const { |
|
return requestGroupTraceID_; |
|
} |
|
|
|
void setRequestGroupTraceID(const std::string& gtid) { |
|
requestGroupTraceID_ = gtid; |
|
} |
|
|
|
size_t cuptiDeviceBufferSize() const { |
|
return cuptiDeviceBufferSize_; |
|
} |
|
|
|
size_t cuptiDeviceBufferPoolLimit() const { |
|
return cuptiDeviceBufferPoolLimit_; |
|
} |
|
|
|
void updateActivityProfilerRequestReceivedTime(); |
|
|
|
void printActivityProfilerConfig(std::ostream& s) const override; |
|
void setActivityDependentConfig() override; |
|
|
|
void validate(const std::chrono::time_point<std::chrono::system_clock>& |
|
fallbackProfileStartTime) override; |
|
|
|
static void addConfigFactory( |
|
std::string name, |
|
std::function<AbstractConfig*(Config&)> factory); |
|
|
|
void print(std::ostream& s) const; |
|
|
|
|
|
|
|
|
|
|
|
static std::shared_ptr<void> getStaticObjectsLifetimeHandle(); |
|
|
|
bool getTSCTimestampFlag() const { |
|
return useTSCTimestamp_; |
|
} |
|
|
|
void setTSCTimestampFlag(bool flag) { |
|
useTSCTimestamp_ = flag; |
|
} |
|
|
|
private: |
|
explicit Config(const Config& other) = default; |
|
|
|
AbstractConfig* cloneDerived(AbstractConfig& parent) const override { |
|
|
|
assert(false); |
|
return nullptr; |
|
} |
|
|
|
uint8_t createDeviceMask(const std::string& val); |
|
|
|
|
|
|
|
void setActivityTypes(const std::vector<std::string>& selected_activities); |
|
|
|
|
|
void selectDefaultActivityTypes() { |
|
|
|
for (ActivityType t : defaultActivityTypes()) { |
|
selectedActivityTypes_.insert(t); |
|
} |
|
} |
|
|
|
int verboseLogLevel_; |
|
std::vector<std::string> verboseLogModules_; |
|
|
|
|
|
|
|
std::chrono::milliseconds samplePeriod_; |
|
std::chrono::milliseconds reportPeriod_; |
|
int samplesPerReport_; |
|
std::set<std::string> eventNames_; |
|
std::set<std::string> metricNames_; |
|
|
|
|
|
std::chrono::seconds eventProfilerOnDemandDuration_; |
|
|
|
std::chrono::time_point<std::chrono::system_clock> |
|
eventProfilerOnDemandTimestamp_; |
|
|
|
int eventProfilerMaxInstancesPerGpu_; |
|
|
|
|
|
|
|
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod_; |
|
|
|
|
|
std::string eventLogFile_; |
|
std::vector<int> eventReportPercentiles_ = {5, 25, 50, 75, 95}; |
|
uint8_t eventProfilerDeviceMask_ = ~0; |
|
std::chrono::milliseconds multiplexPeriod_; |
|
|
|
|
|
bool activityProfilerEnabled_; |
|
|
|
|
|
bool perThreadBufferEnabled_; |
|
std::set<ActivityType> selectedActivityTypes_; |
|
|
|
|
|
std::string activitiesLogFile_; |
|
|
|
std::string activitiesLogUrl_; |
|
|
|
|
|
bool activitiesLogToMemory_{false}; |
|
|
|
int activitiesMaxGpuBufferSize_; |
|
std::chrono::seconds activitiesWarmupDuration_; |
|
int activitiesWarmupIterations_; |
|
bool activitiesCudaSyncWaitEvents_; |
|
|
|
|
|
|
|
|
|
bool enableReportInputShapes_{false}; |
|
bool enableProfileMemory_{false}; |
|
bool enableWithStack_{false}; |
|
bool enableWithFlops_{false}; |
|
bool enableWithModules_{false}; |
|
|
|
|
|
std::chrono::milliseconds activitiesDuration_; |
|
int activitiesRunIterations_; |
|
|
|
|
|
|
|
std::string activitiesExternalAPIIterationsTarget_; |
|
|
|
std::vector<std::string> activitiesExternalAPIFilter_; |
|
|
|
int activitiesExternalAPINetSizeThreshold_; |
|
|
|
int activitiesExternalAPIGpuOpCountThreshold_; |
|
|
|
std::chrono::time_point<std::chrono::system_clock> |
|
activitiesOnDemandTimestamp_; |
|
|
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock> profileStartTime_; |
|
|
|
int profileStartIteration_; |
|
int profileStartIterationRoundUp_; |
|
|
|
|
|
std::chrono::time_point<std::chrono::system_clock> requestTimestamp_; |
|
|
|
|
|
bool enableSigUsr2_; |
|
|
|
|
|
bool enableIpcFabric_; |
|
std::chrono::seconds onDemandConfigUpdateIntervalSecs_; |
|
|
|
|
|
std::string requestTraceID_; |
|
std::string requestGroupTraceID_; |
|
|
|
|
|
size_t cuptiDeviceBufferSize_; |
|
size_t cuptiDeviceBufferPoolLimit_; |
|
|
|
|
|
bool useTSCTimestamp_{true}; |
|
}; |
|
|
|
constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON"; |
|
|
|
bool isDaemonEnvVarSet(); |
|
|
|
} |
|
|