Skip to content
Merged

Dev #11

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
25029e5
fix: CSP error
iwvw Jan 21, 2026
0e0e9d6
feat: Add multi-architecture Docker image CI/CD pipeline, modify Dock…
iwvw Jan 21, 2026
f413690
feat: add API model configuration matrix and OpenAI module.
iwvw Jan 21, 2026
6b03699
build: update Dockerfile to optimize image build process.
iwvw Jan 24, 2026
cc6d034
Merge branch 'main' into dev
iwvw Jan 24, 2026
6e9bfe2
chore: force rebuild docker image
iwvw Jan 24, 2026
fe0dc91
Merge branch 'dev' of https://github.com/iwvw/API-Monitor into dev
iwvw Jan 24, 2026
8e6aa32
chore: force rebuild docker image
iwvw Jan 24, 2026
9997d70
chore: bump version to v0.1.3
iwvw Jan 24, 2026
dd6d4e2
ci: disable docker cache for debugging
iwvw Jan 24, 2026
80be788
fix: add missing modules directory to Docker image
iwvw Jan 24, 2026
e561eec
feat: add server power action interface (reboot/shutdown)
Jan 25, 2026
0050aad
fix(agent): prevent frequent powershell calls when no gpu is detected
Jan 25, 2026
a4744c9
feat(agent): refactor GPU monitoring to use native PDH and NVML APIs
Jan 25, 2026
6322911
fix(ci): optimize docker multi-arch delivery to prevent 400 errors
Jan 25, 2026
560ac4b
fix(agent): fix cross-platform build by moving windows-specific code …
Jan 25, 2026
140c224
fix(agent): add missing sync import in collector_windows.go
Jan 25, 2026
bd7a88c
fix
Jan 25, 2026
03bc1e7
fix: agent build errors, startup notification spam, and email sender …
Jan 25, 2026
5070928
feat: add sender_name input to email channel settings UI
Jan 25, 2026
96a3625
feat: add daily trend sparkline chart to dashboard
Jan 25, 2026
fcaa1ad
feat: optimize chart padding and merge service cards
Jan 25, 2026
20ca8a9
fix: add null check for validation errors to prevent 500 crash
Jan 25, 2026
fa08120
feat: enable viewing TOTP secret in edit mode
Jan 25, 2026
821a5f9
fix(openai-monitor): increase health check timeout and optimize max_t…
Jan 25, 2026
cdf3a25
fix(monitor): dynamic max_tokens for reasoning models
Jan 25, 2026
7b86746
fix(monitor): remove explicit max_tokens to support all models
Jan 25, 2026
97abff3
fix(gemini-cli): reduce thinking budget to avoid capacity exhaustion
Jan 25, 2026
6743f32
fix(gemini-cli): remove limit on maxOutputTokens when not specified
Jan 25, 2026
90ab279
fix(gemini-cli): remove max_tokens from health check to avoid thinkin…
Jan 25, 2026
917826a
feat: Implement the initial dashboard page, including system overview…
Jan 25, 2026
bb22d62
feat: implement dashboard module with caching, parallel data fetching…
Jan 25, 2026
6fd6fe7
feat: add comprehensive styling for the dashboard layout, header, car…
Jan 25, 2026
54e381b
feat: Implement grouped navigation bar styles, main application style…
Jan 25, 2026
79c8fa4
feat: add OpenAI API monitoring and management interface with endpoin…
Jan 25, 2026
00e3a5a
feat: Implement dashboard 2.0 styling with strict alignment and spacing.
Jan 25, 2026
49940e1
feat: 添加 OpenAI API 端点管理功能,包含健康检测、模型列表和账号管理。
Jan 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ jobs:
file: ./Dockerfile
platforms: ${{ matrix.platform }}
push: true
# 使用带架构后缀的临时标签
# 同时推送到两个 Registry 的临时架构标签,确保层已就绪,避免合并时跨 Registry 复制层
tags: |
${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:build-${{ github.run_id }}-${{ matrix.suffix }}
${{ (github.event_name != 'pull_request' && secrets.DOCKERHUB_TOKEN != '') && format('{0}:build-{1}-{2}', env.DOCKERHUB_REPO, github.run_id, matrix.suffix) || '' }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=${{ matrix.platform }}
cache-to: type=gha,mode=max,scope=${{ matrix.platform }}
Expand Down Expand Up @@ -198,10 +199,30 @@ jobs:
TAGS="${{ steps.meta.outputs.tags }}"
echo "$TAGS" | while IFS= read -r TAG; do
[ -z "$TAG" ] && continue
echo "Creating manifest for: $TAG"
docker buildx imagetools create -t "$TAG" \
"${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}@${AMD64_DIGEST}" \
"${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}@${ARM64_DIGEST}"

# 确定当前标签所属的 Registry 以选择同源的源镜像进行合并(避免跨 Registry 复制 Blob 导致 400 错误)
if [[ "$TAG" == "${{ env.REGISTRY }}"* ]]; then
SRC_PREFIX="${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}"
else
SRC_PREFIX="${{ env.DOCKERHUB_REPO }}"
fi

echo "Creating manifest for: $TAG using source: $SRC_PREFIX"

# 增加重试机制,应对 Registry 极端情况下的抖动
for i in {1..3}; do
docker buildx imagetools create -t "$TAG" \
"${SRC_PREFIX}@${AMD64_DIGEST}" \
"${SRC_PREFIX}@${ARM64_DIGEST}" && break || {
if [ $i -lt 3 ]; then
echo "Push failed, retrying in 10s ($i/3)..."
sleep 10
else
echo "Failed to create manifest for $TAG after 3 attempts."
exit 1
fi
}
done
done

- name: Image digest
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ COPY --from=agent-builder --chown=nodejs:nodejs /app/agent-go/agent-windows-amd6
# 4. 复制后端源码 (不包含 node_modules)
COPY --chown=nodejs:nodejs server.js ./
COPY --chown=nodejs:nodejs src ./src
COPY --chown=nodejs:nodejs modules ./modules

ENV NODE_ENV=production \
PORT=3000 \
Expand Down
76 changes: 56 additions & 20 deletions agent-go/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,29 @@ type Collector struct {
lastGPUPower float64
lastGPUTime time.Time

// CPU 采集缓存 (保持上次有效值,避免返回 0)
lastCPUUsage float64
// GPU 采集频率控制
lastGPUMetadataTime time.Time

// CPU 采集缓存
lastCPUTime time.Time
lastCPUUsage float64

// Windows Native (PDH)
pdhQuery uintptr
pdhCounter uintptr

// NVIDIA Native (NVML)
nvmlLib any
nvmlInitialized bool
}

// NewCollector 创建采集器
func NewCollector() *Collector {
return &Collector{
lastNetTime: time.Now(),
lastGPUTime: time.Now().Add(-1 * time.Hour), // 确保第一次采集立即执行
lastCPUTime: time.Now().Add(-1 * time.Hour), // 确保第一次采集立即执行
lastNetTime: time.Now(),
lastGPUTime: time.Now().Add(-1 * time.Hour), // 确保第一次采集立即执行
lastCPUTime: time.Now().Add(-1 * time.Hour), // 确保第一次采集立即执行
lastGPUMetadataTime: time.Now().Add(-1 * time.Hour), // 确保第一次采集立即执行
}
}

Expand Down Expand Up @@ -191,6 +203,7 @@ func (c *Collector) CollectHostInfo() *HostInfo {
gpuModels, gpuMemTotal := c.collectGPUMetadata()
info.GPU = gpuModels
info.GPUMemTotal = gpuMemTotal
c.lastGPUMetadataTime = time.Now()

c.cachedHostInfo = info
return info
Expand Down Expand Up @@ -321,21 +334,27 @@ func (c *Collector) CollectState() *State {
c.lastGPUTime = time.Now()
}

// 补救措施:如果显存总量为 0,尝试重新获取静态信息
// 补救措施:如果显存总量为 0,尝试重新获取静态信息 (增加冷却时间,防止频繁调用 PowerShell)
if c.cachedHostInfo != nil && c.cachedHostInfo.GPUMemTotal == 0 {
go func() {
c.mu.Lock()
defer c.mu.Unlock()
// 再次检查,防止并发重复
if c.cachedHostInfo.GPUMemTotal == 0 {
c.mu.Lock()
shouldRetry := time.Since(c.lastGPUMetadataTime) > 10*time.Minute
if shouldRetry {
c.lastGPUMetadataTime = time.Now() // 预设时间,防止下一秒再次触发
}
c.mu.Unlock()

if shouldRetry {
go func() {
models, total := c.collectGPUMetadata()
if total > 0 {
c.mu.Lock()
c.cachedHostInfo.GPU = models
c.cachedHostInfo.GPUMemTotal = total
c.mu.Unlock()
fmt.Printf("[Collector] GPU metadata refreshed: %d MiB\n", total/1024/1024)
}
}
}()
}()
}
}
state.GPU = c.lastGPUUsage
state.GPUMemUsed = c.lastGPUMemUsed
Expand Down Expand Up @@ -545,6 +564,14 @@ func (c *Collector) collectGPUState() (float64, uint64, float64) {

// 2. 如果没有 NVIDIA,尝试其他方案
if runtime.GOOS == "windows" {
// 如果 meta 数据中已经确认没有 GPU 型号,则不再尝试采集使用率,避免频繁调用 PowerShell
c.mu.Lock()
hasGPU := c.cachedHostInfo != nil && len(c.cachedHostInfo.GPU) > 0
c.mu.Unlock()
if !hasGPU {
return 0, 0, 0
}

// Windows: 使用 Performance Counter 采集所有 GPU
return c.collectGPUStateWindows()
} else if runtime.GOOS == "linux" {
Expand All @@ -555,8 +582,14 @@ func (c *Collector) collectGPUState() (float64, uint64, float64) {
return 0, 0, 0
}

// collectNvidiaGPUState 使用 nvidia-smi 采集 NVIDIA GPU 状态
// collectNvidiaGPUState 使用 NVML (优先) 或 nvidia-smi 采集 NVIDIA GPU 状态
func (c *Collector) collectNvidiaGPUState(nvidiaSmi string) (float64, uint64, float64) {
// 1. 尝试使用原生 NVML API (性能更高,不产生新进程)
if usage, usedMem, power, ok := c.collectNvidiaGPUStateNative(); ok {
return usage, usedMem, power
}

// 2. 回退到 nvidia-smi 命令行工具
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()

Expand Down Expand Up @@ -597,14 +630,19 @@ func (c *Collector) collectNvidiaGPUState(nvidiaSmi string) (float64, uint64, fl
return totalUsage / float64(count), totalUsedMem, totalPower
}

// collectGPUStateWindows Windows 下采集 AMD/Intel GPU 使用率
// 使用 PowerShell 查询 GPU Engine 性能计数器
// collectGPUStateWindows Windows 下采集 AMD/Intel/NVIDIA GPU 使用率
// 优先使用 PDH 性能计数器 API,回退到 PowerShell
func (c *Collector) collectGPUStateWindows() (float64, uint64, float64) {
// 1. 尝试使用原生 PDH API (性能极高,无额外进程)
if usage, ok := c.collectGPUUsagePDH(); ok {
return usage, 0, 0
}

// 2. 回退到 PowerShell (仅在 PDH 失败时使用)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()

// 方案1: 使用 GPU Engine 性能计数器 (Windows 10 1709+)
// 查询所有 GPU 3D 引擎的使用率
psCmd := `
$counters = Get-Counter '\GPU Engine(*engtype_3D)\Utilization Percentage' -ErrorAction SilentlyContinue
if ($counters) {
Expand All @@ -624,9 +662,6 @@ if ($counters) {
}

usage, _ := strconv.ParseFloat(strings.TrimSpace(string(output)), 64)
if usage > 0 {
// fmt.Printf("[Collector] GPU (Win PerfCounter): %.1f%%\n", usage)
}
return usage, 0, 0
}

Expand Down Expand Up @@ -802,6 +837,7 @@ func (c *Collector) collectIntelGPULinux() float64 {
return 0
}


func (c *Collector) getNvidiaSmiPath() string {
if runtime.GOOS == "windows" {
possiblePaths := []string{
Expand Down
14 changes: 14 additions & 0 deletions agent-go/collector_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//go:build !windows

package main

// collectGPUUsagePDH Windows-only stub
func (c *Collector) collectGPUUsagePDH() (float64, bool) {
return 0, false
}

// collectNvidiaGPUStateNative Non-Windows stub
// (On Linux it currently falls back to nvidia-smi command line)
func (c *Collector) collectNvidiaGPUStateNative() (float64, uint64, float64, bool) {
return 0, 0, 0, false
}
137 changes: 137 additions & 0 deletions agent-go/collector_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//go:build windows

package main

import (
"runtime"
"syscall"
"unsafe"
)

var (
modPdh = syscall.NewLazyDLL("pdh.dll")
procPdhOpenQuery = modPdh.NewProc("PdhOpenQueryW")
procPdhAddEnglishCounter = modPdh.NewProc("PdhAddEnglishCounterW")
procPdhCollectQueryData = modPdh.NewProc("PdhCollectQueryData")
procPdhGetFormattedCounterValue = modPdh.NewProc("PdhGetFormattedCounterValue")
procPdhCloseQuery = modPdh.NewProc("PdhCloseQuery")
)

type pdh_fmt_countervalue_double struct {
CStatus uint32
DummyStruct [4]byte // padding for 64-bit alignment
DoubleValue float64
}

// collectGPUUsagePDH 使用原生 PDH API 获取所有 GPU 的 3D 引擎平均使用率
func (c *Collector) collectGPUUsagePDH() (float64, bool) {
if runtime.GOOS != "windows" {
return 0, false
}

c.mu.Lock()
defer c.mu.Unlock()

// 初始化查询
if c.pdhQuery == 0 {
var query uintptr
ret, _, _ := procPdhOpenQuery.Call(0, 0, uintptr(unsafe.Pointer(&query)))
if ret != 0 {
return 0, false
}
c.pdhQuery = query

// 添加计数器 (使用通配符获取所有 GPU 的 3D 引擎使用率)
// 使用 English 名称确保兼容性
counterPath := "\\GPU Engine(*engtype_3D)\\Utilization Percentage"
pathPtr, _ := syscall.UTF16PtrFromString(counterPath)
var counter uintptr
ret, _, _ = procPdhAddEnglishCounter.Call(c.pdhQuery, uintptr(unsafe.Pointer(pathPtr)), 0, uintptr(unsafe.Pointer(&counter)))
if ret != 0 {
procPdhCloseQuery.Call(c.pdhQuery)
c.pdhQuery = 0
return 0, false
}
c.pdhCounter = counter

// 第一次采集建立基准
procPdhCollectQueryData.Call(c.pdhQuery)
return 0, true
}

// 执行采集
ret, _, _ := procPdhCollectQueryData.Call(c.pdhQuery)
if ret != 0 {
return 0, false
}

// 获取格式化后的值
var value pdh_fmt_countervalue_double
const PDH_FMT_DOUBLE = 0x00000200
ret, _, _ = procPdhGetFormattedCounterValue.Call(c.pdhCounter, PDH_FMT_DOUBLE, 0, uintptr(unsafe.Pointer(&value)))
if ret != 0 {
return 0, false
}

return value.DoubleValue, true
}

// NVIDIA NVML 原生支持 (Windows 版)
func (c *Collector) collectNvidiaGPUStateNative() (float64, uint64, float64, bool) {
c.mu.Lock()
defer c.mu.Unlock()

if !c.nvmlInitialized {
if c.nvmlLib == nil {
c.nvmlLib = syscall.NewLazyDLL("nvml.dll")
}

lib := c.nvmlLib.(*syscall.LazyDLL)
// 尝试初始化
initProc := lib.NewProc("nvmlInit_v2")
if err := initProc.Find(); err != nil {
return 0, 0, 0, false
}
ret, _, _ := initProc.Call()
if ret != 0 {
return 0, 0, 0, false
}
c.nvmlInitialized = true
}

lib := c.nvmlLib.(*syscall.LazyDLL)
// 获取第一个设备的句柄 (简化处理)
getHandle := lib.NewProc("nvmlDeviceGetHandleByIndex_v2")
var device uintptr
ret, _, _ := getHandle.Call(0, uintptr(unsafe.Pointer(&device)))
if ret != 0 {
return 0, 0, 0, false
}

// 获取利用率
getUtil := lib.NewProc("nvmlDeviceGetUtilizationRates")
var util struct {
GPU uint32
Memory uint32
}
ret, _, _ = getUtil.Call(device, uintptr(unsafe.Pointer(&util)))
if ret != 0 {
return 0, 0, 0, false
}

// 获取显存
getMem := lib.NewProc("nvmlDeviceGetMemoryInfo")
var mem struct {
Total uint64
Free uint64
Used uint64
}
ret, _, _ = getMem.Call(device, uintptr(unsafe.Pointer(&mem)))

// 获取功耗 (单位通常是毫瓦)
getPower := lib.NewProc("nvmlDeviceGetPowerUsage")
var power uint32
ret, _, _ = getPower.Call(device, uintptr(unsafe.Pointer(&power)))

return float64(util.GPU), mem.Used, float64(power) / 1000.0, true
}
14 changes: 14 additions & 0 deletions modules/antigravity-api/storage.js
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,24 @@ function getStats() {
.get();

const accounts = getAccounts();

// 获取最近 14 天的趋势数据
const dailyTrend = db.prepare(`
SELECT
strftime('%Y-%m-%d', datetime(created_at, 'localtime')) as date,
COUNT(*) as total,
SUM(CASE WHEN status_code = 200 THEN 1 ELSE 0 END) as success
FROM antigravity_logs
WHERE created_at >= datetime('now', '-14 days', 'localtime')
GROUP BY date
ORDER BY date ASC
`).all();

return {
total_calls: stats.total_calls || 0,
success_calls: stats.success_calls || 0,
fail_calls: stats.fail_calls || 0,
daily_trend: dailyTrend || [],
accounts: {
total: accounts.length,
online: accounts.filter(a => a.status === 'online').length,
Expand Down
Loading
Loading