From 95646f5cf8c3a995f0fb3c680f2d690b0172f193 Mon Sep 17 00:00:00 2001 From: "meiyou.hr" Date: Tue, 1 Apr 2025 19:29:41 +0800 Subject: [PATCH] add gp_binary test --- conf/functional/gp-test.conf | 0 tests/gp-test/Readme.md | 30 +++++++++++++++++ tests/gp-test/get_ecos.sh | 26 +++++++++++++++ tests/gp-test/gpu_task.py | 14 ++++++++ tests/gp-test/install.sh | 20 +++++++++++ tests/gp-test/loop_inject.sh | 64 ++++++++++++++++++++++++++++++++++++ tests/gp-test/parse.awk | 17 ++++++++++ tests/gp-test/run.sh | 28 ++++++++++++++++ 8 files changed, 199 insertions(+) create mode 100644 conf/functional/gp-test.conf create mode 100644 tests/gp-test/Readme.md create mode 100644 tests/gp-test/get_ecos.sh create mode 100644 tests/gp-test/gpu_task.py create mode 100644 tests/gp-test/install.sh create mode 100644 tests/gp-test/loop_inject.sh create mode 100755 tests/gp-test/parse.awk create mode 100644 tests/gp-test/run.sh diff --git a/conf/functional/gp-test.conf b/conf/functional/gp-test.conf new file mode 100644 index 0000000..e69de29 diff --git a/tests/gp-test/Readme.md b/tests/gp-test/Readme.md new file mode 100644 index 0000000..1d42dff --- /dev/null +++ b/tests/gp-test/Readme.md @@ -0,0 +1,30 @@ +# gp-test +## Description + +About the test suite + +## Homepage +[http://](http://) + +## Version + +## Category + +functional/benchmarck/stress + +## Parameters + +- __value__ : about the value + +## Results + +``` +result: value +``` + +## Manual Run +``` +step n: xxxx + +``` + diff --git a/tests/gp-test/get_ecos.sh b/tests/gp-test/get_ecos.sh new file mode 100644 index 0000000..07989cf --- /dev/null +++ b/tests/gp-test/get_ecos.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [[ "$1" == "" ]];then + channel="release" +else + channel="$1" +fi + + +[[ -f latest ]] && rm -f latest +wget https://gpu-profiling.oss-cn-hangzhou.aliyuncs.com/$channel/latest + +release=$(cat latest | head -n 1) +checksum=$(cat latest | tail -n 1) +ecos="ecos-ctl.$release" + +[[ -f $ecos ]] && rm -f $ecos + +echo "getting $ecos" +wget https://gpu-profiling.oss-cn-hangzhou.aliyuncs.com/$channel/$ecos +sha1=$(sha1sum $ecos | awk '{print $1}') +if [[ "$checksum" == "$sha1" ]];then + echo "checked ok." +else + echo "expect $checksum, but got $sha1" +fi \ No newline at end of file diff --git a/tests/gp-test/gpu_task.py b/tests/gp-test/gpu_task.py new file mode 100644 index 0000000..162e642 --- /dev/null +++ b/tests/gp-test/gpu_task.py @@ -0,0 +1,14 @@ +import torch +import time + +def matrix_multiplication(): + device = torch.device('cuda') + a = torch.randn(10000, 10000, device=device) + b = torch.randn(10000, 10000, device=device) + c = torch.matmul(a, b) + return c + +if __name__ == "__main__": + while True: + matrix_multiplication() + time.sleep(1) # 模拟长时间运行 diff --git a/tests/gp-test/install.sh b/tests/gp-test/install.sh new file mode 100644 index 0000000..254c86f --- /dev/null +++ b/tests/gp-test/install.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Avaliable environment: +# +# Download variable: +# WEB_URL= +# GIT_URL= + + +# fetch() + +build() +{ + : +} + +install() +{ + cp -Trf $TONE_BM_SUITE_DIR $TONE_BM_RUN_DIR +} + \ No newline at end of file diff --git a/tests/gp-test/loop_inject.sh b/tests/gp-test/loop_inject.sh new file mode 100644 index 0000000..6572801 --- /dev/null +++ b/tests/gp-test/loop_inject.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +pass() +{ + echo "====PASS: $*" +} + +fail() +{ + echo "====FAIL: $*" +} +# 默认值为1000 +CYCLE_COUNT=${CYCLE_COUNT:-1000} + +postfix=".$(cat latest | head -n 1)" +ECOS="./ecos-ctl$postfix" + +if [ -n "$1" ]; then + if [ ! -f "$1" ]; then + echo "$1 not exist!" + exit 1 + fi + ECOS=./$1 +fi + +chmod +x $ECOS +echo "using $ECOS" + +for i in $(seq 1 $CYCLE_COUNT); do + echo "Current cycle: $i/$CYCLE_COUNT" + + pids=$(nvidia-smi | grep python | awk '{print $5}' | tr '\n' ',') + if [ -z "$pids" ]; then + echo "error! No python processes found. exiting..." + fail "gp_loop_inject_$CYCLE_COUNT" + exit 1 + fi + + rm -f ./log/* + rm -f ./log/.cupti_fifo_*.buffer + pids="${pids%?}" + echo "inject to $pids" + $ECOS --debug monitor gpu probe --output ./log -d 1000 --pid $pids --force > ./log/ecoslog 2>&1 & + monitor_pid=$! + + sleep 2 + files=$(ls ./log/ | grep json | wc -l) + raws=$(ls ./log/ | grep raw | wc -l) + + if [ "$files" -ne "$raws" ]; then + echo "error! File count mismatch. exiting..." + fail "gp_loop_inject_$CYCLE_COUNT" + kill -9 $monitor_pid + exit 1 + fi +done + +# End the monitored processes after testing is complete for this cycle. +echo $pids | tr ',' '\n' | while read -r pid; do + kill -9 $pid +done + +# End the ECOS monitoring process after all cycles are complete. +pass "gp_loop_inject_$CYCLE_COUNT" diff --git a/tests/gp-test/parse.awk b/tests/gp-test/parse.awk new file mode 100755 index 0000000..784aa03 --- /dev/null +++ b/tests/gp-test/parse.awk @@ -0,0 +1,17 @@ +#!/usr/bin/awk -f + +/^====PASS:/ { + printf("%s: Pass\n",$2) +} + +/^====FAIL:/ { + printf("%s: Fail\n",$2) +} + +/^====SKIP:/ { + printf("%s: Skip\n",$2) +} + +/^====WARN:/ { + printf("%s: Warning\n",$2) +} diff --git a/tests/gp-test/run.sh b/tests/gp-test/run.sh new file mode 100644 index 0000000..6af325c --- /dev/null +++ b/tests/gp-test/run.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Avaliable environment: +# +# Download variable: +# WEB_URL= +# GIT_URL= + + +run() +{ + export CYCLE_COUNT=10 + mkdir -p log/ + bash get_ecos.sh + docker run -d --name gptest_container --gpus all gptest_image:latest /bin/bash -c "while true; do sleep 60; done" + docker exec -d gptest_container bash -c "python3 /root/gpu_task.py" + sleep 5 + bash loop_inject.sh + + docker rm -f gptest_container + upload_archives log/ + +} + +parse() +{ + $TONE_BM_SUITE_DIR/parse.awk +} + \ No newline at end of file -- Gitee