-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'das/dev'
- Loading branch information
Showing
39 changed files
with
16,655 additions
and
262 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# 1. 使用轻量级的基础镜像 | ||
FROM ubuntu:22.04 | ||
|
||
# 2. 设置工作目录为 /root | ||
WORKDIR /root | ||
|
||
# 3. 复制已编译好的二进制文件到 /usr/local/bin 目录 | ||
COPY dcgm-dcu /usr/local/bin/dcgm-dcu | ||
|
||
# 4. 复制 .so 依赖库到 /usr/local/bin 目录 | ||
COPY pkg/dcgm/lib/librocm_smi64.so.2.8 /usr/local/bin/lib/librocm_smi64.so.2.8 | ||
COPY pkg/dcgm/lib/libhydmi.so.1.4 /usr/local/bin/lib/libhydmi.so.1.4 | ||
|
||
# 5. 为 .so 文件设置 755 权限 | ||
RUN chmod +x /usr/local/bin/lib/librocm_smi64.so.2.8 /usr/local/bin/lib/libhydmi.so.1.4 | ||
|
||
# 6. 设置软链接 | ||
RUN ln -s /usr/local/bin/lib/librocm_smi64.so.2.8 /usr/local/bin/lib/librocm_smi64.so.2 \ | ||
&& ln -s /usr/local/bin/lib/librocm_smi64.so.2 /usr/local/bin/lib/librocm_smi64.so \ | ||
&& ln -s /usr/local/bin/lib/libhydmi.so.1.4 /usr/local/bin/lib/libhydmi.so.1 \ | ||
&& ln -s /usr/local/bin/lib/libhydmi.so.1 /usr/local/bin/lib/libhydmi.so | ||
|
||
# 7. 设置 LD_LIBRARY_PATH 环境变量以查找共享库 | ||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/bin/lib | ||
|
||
# 8. 确保二进制文件具有可执行权限 | ||
RUN chmod +x /usr/local/bin/dcgm-dcu | ||
|
||
# 9. 暴露服务端口 16081 | ||
EXPOSE 16081 | ||
|
||
# 10. 启动服务,并将日志写入文件 | ||
CMD ["sh", "-c", "/usr/local/bin/dcgm-dcu -logtostderr -v=2 > /usr/local/bin/dcgm.log 2>&1"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package cli | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/spf13/cobra" | ||
|
||
"g.sugon.com/das/dcgm-dcu/pkg/dcgm" | ||
) | ||
|
||
var pidListCmd = &cobra.Command{ | ||
Use: "pid-list", | ||
Short: "Get a list of PIDs", | ||
Long: `Retrieve a list of process IDs (PIDs) managed by the system.`, | ||
Run: func(cmd *cobra.Command, args []string) { | ||
pidList, err := dcgm.PidList() | ||
if err != nil { | ||
fmt.Println("Error fetching PID list:", err) | ||
os.Exit(1) | ||
} | ||
|
||
fmt.Println("PID List:") | ||
for _, pid := range pidList { | ||
fmt.Println(pid) | ||
} | ||
}, | ||
} | ||
|
||
var showPidsCmd = &cobra.Command{ | ||
Use: "show-pids", | ||
Short: "Show running KFD process information", | ||
Long: `Retrieve and display detailed information about KFD processes currently running on the system.`, | ||
Run: func(cmd *cobra.Command, args []string) { | ||
// 调用 ShowPids 函数 | ||
err := dcgm.ShowPids() | ||
if err != nil { | ||
fmt.Println("Error displaying KFD process information:", err) | ||
os.Exit(1) | ||
} | ||
}, | ||
} | ||
|
||
func init() { | ||
rootCmd.AddCommand(pidListCmd) | ||
rootCmd.AddCommand(showPidsCmd) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package cli | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/spf13/cobra" | ||
|
||
"g.sugon.com/das/dcgm-dcu/pkg/dcgm" | ||
) | ||
|
||
var dcgmInitialized bool // 追踪 DCGM 是否成功初始化 | ||
|
||
var rootCmd = &cobra.Command{ | ||
Use: "dcgm", | ||
Short: "DCGM CLI tool", | ||
Long: "Command-line interface for managing and interacting with DCGM. Use dcgm-cli [command] --help for more information on a command.", | ||
PersistentPreRunE: func(cmd *cobra.Command, args []string) error { | ||
// 在执行任何命令之前运行初始化 | ||
if err := dcgm.Init(); err != nil { | ||
return fmt.Errorf("initialization failed: %v", err) | ||
} | ||
dcgmInitialized = true // 表示初始化成功 | ||
return nil | ||
}, | ||
} | ||
|
||
// Execute 执行 root 命令 | ||
func Execute() { | ||
defer func() { | ||
// 仅当 DCGM 成功初始化时才调用 ShutDown | ||
if dcgmInitialized { | ||
if err := dcgm.ShutDown(); err != nil { | ||
fmt.Println("Failed to shut down properly:", err) | ||
} | ||
} | ||
}() | ||
|
||
if err := rootCmd.Execute(); err != nil { | ||
fmt.Println(err) | ||
os.Exit(1) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package cli | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
) | ||
|
||
func dataToJson(data any) string { | ||
jsonData, err := json.MarshalIndent(data, "", " ") | ||
if err != nil { | ||
fmt.Println("Error serializing to JSON:", err) | ||
} | ||
return string(jsonData) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package cli | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"strconv" | ||
|
||
"github.com/spf13/cobra" | ||
|
||
"g.sugon.com/das/dcgm-dcu/pkg/dcgm" | ||
) | ||
|
||
var vDeviceInfoCmd = &cobra.Command{ | ||
Use: "vdevice-info [device-index]", | ||
Short: "Get virtual device information", | ||
Long: `Retrieve detailed information about a virtual device using its device index.`, | ||
Args: cobra.ExactArgs(1), | ||
Run: func(cmd *cobra.Command, args []string) { | ||
dvInd, err := strconv.Atoi(args[0]) | ||
if err != nil { | ||
fmt.Println("Invalid device index:", err) | ||
os.Exit(1) | ||
} | ||
|
||
info, err := dcgm.VDeviceSingleInfo(dvInd) | ||
if err != nil { | ||
fmt.Println("Error fetching virtual device info:", err) | ||
os.Exit(1) | ||
} | ||
|
||
fmt.Printf("Virtual Device Info: %+v\n", info) | ||
}, | ||
} | ||
|
||
var destroyVDeviceCmd = &cobra.Command{ | ||
Use: "destroy-vdevice<dvInd>", | ||
Short: "Destroy a single virtual device", | ||
Long: `This command destroys a single virtual device by its index.`, | ||
Args: cobra.ExactArgs(1), | ||
Run: func(cmd *cobra.Command, args []string) { | ||
vDvInd, err := strconv.Atoi(args[0]) | ||
if err != nil { | ||
fmt.Println("Invalid virtual device index:", err) | ||
os.Exit(1) | ||
} | ||
|
||
err = dcgm.DestroySingleVDevice(vDvInd) | ||
if err != nil { | ||
fmt.Println("Error destroying virtual device:", err) | ||
os.Exit(1) | ||
} | ||
|
||
fmt.Printf("Virtual device %d destroyed successfully.\n", vDvInd) | ||
}, | ||
} | ||
var allDeviceInfosCmd = &cobra.Command{ | ||
Use: "all-device-infos", | ||
Short: "Get information for all physical devices", | ||
Long: `Retrieve detailed information about all physical devices.`, | ||
Run: func(cmd *cobra.Command, args []string) { | ||
infos, err := dcgm.AllDeviceInfos() | ||
if err != nil { | ||
fmt.Println("Error fetching all device infos:", err) | ||
os.Exit(1) | ||
} | ||
fmt.Println("==========allDevices==========") | ||
fmt.Printf(dataToJson(infos)) | ||
|
||
}, | ||
} | ||
|
||
func init() { | ||
rootCmd.AddCommand(vDeviceInfoCmd) | ||
rootCmd.AddCommand(destroyVDeviceCmd) | ||
rootCmd.AddCommand(allDeviceInfosCmd) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package main | ||
|
||
import "g.sugon.com/das/dcgm-dcu/pkg/cmd/cli" | ||
|
||
func main() { | ||
cli.Execute() // 执行 rootCmd | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.