Skip to content

Commit 6d1daf3

Browse files
authored
Merge pull request #1 from tigillo/rate-limit-headers
Add rate limit headers, token usage tracking, and improve error handling
2 parents 5093866 + 832b2f0 commit 6d1daf3

File tree

6 files changed

+110
-20
lines changed

6 files changed

+110
-20
lines changed

README.md

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ It allows you to list models, perform chat/inference completions, and supports s
1111

1212
- List available models in the GitHub Models catalog
1313
- Create chat completions (like OpenAI’s `ChatCompletion`)
14+
- Rate limit tracking (headers parsed automatically)
15+
- Token usage tracking (prompt, completion, total)
1416
- Optional streaming support for real-time responses
1517
- Supports organization-scoped endpoints
1618
- Easy-to-use Go client interface
@@ -25,15 +27,17 @@ go get github.com/tigillo/githubmodels-go
2527

2628
## Usage
2729
### Initialize Client
28-
```
30+
```go
2931
package main
3032

3133
import (
3234
"context"
3335
"fmt"
3436
"os"
37+
"time"
3538

3639
githubmodels "github.com/tigillo/githubmodels-go/client"
40+
"github.com/tigillo/githubmodels-go/models"
3741
)
3842

3943
func main() {
@@ -43,30 +47,41 @@ func main() {
4347
ctx := context.Background()
4448

4549
// Example: list models
46-
models, err := client.ListModels(ctx)
50+
modelsList, err := client.ListModels(ctx)
4751
if err != nil {
4852
panic(err)
4953
}
5054

51-
for _, m := range models {
55+
for _, m := range modelsList {
5256
fmt.Println(m.ID, "-", m.Description)
5357
}
5458
}
5559
```
5660

5761
### Create Chat Completion
58-
```
59-
resp, err := client.ChatCompletion(ctx, githubmodels.ChatRequest{
62+
```go
63+
resp, err := client.ChatCompletion(ctx, models.ChatRequest{
6064
Model: "github/code-chat",
61-
Messages: []githubmodels.Message{
65+
Messages: []models.Message{
6266
{Role: "user", Content: "Write a Go function to reverse a string"},
6367
},
6468
})
69+
70+
// Check for rate limit info even on error
71+
if resp != nil && resp.RateLimit.Limit > 0 {
72+
fmt.Printf("Rate Limit: %d/%d remaining\n", resp.RateLimit.Remaining, resp.RateLimit.Limit)
73+
fmt.Printf("Resets at: %s\n", time.Unix(resp.RateLimit.Reset, 0))
74+
}
75+
6576
if err != nil {
6677
panic(err)
6778
}
6879

6980
fmt.Println(resp.Choices[0].Message.Content)
81+
82+
// Check token usage
83+
fmt.Printf("Token Usage: %d prompt + %d completion = %d total\n",
84+
resp.Usage.PromptTokens, resp.Usage.CompletionTokens, resp.Usage.TotalTokens)
7085
```
7186

7287
## Environment Variables

client/client.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"encoding/json"
77
"fmt"
88
"net/http"
9+
"strconv"
910

1011
"github.com/tigillo/githubmodels-go/models"
1112
)
@@ -95,3 +96,34 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest)
9596

9697
return &chatResp, nil
9798
}
99+
100+
// ParseRateLimitHeaders extracts rate limit information from HTTP headers
101+
func ParseRateLimitHeaders(headers http.Header) models.RateLimitInfo {
102+
info := models.RateLimitInfo{}
103+
104+
if limit := headers.Get("X-RateLimit-Limit"); limit != "" {
105+
if val, err := strconv.Atoi(limit); err == nil {
106+
info.Limit = val
107+
}
108+
}
109+
110+
if remaining := headers.Get("X-RateLimit-Remaining"); remaining != "" {
111+
if val, err := strconv.Atoi(remaining); err == nil {
112+
info.Remaining = val
113+
}
114+
}
115+
116+
if reset := headers.Get("X-RateLimit-Reset"); reset != "" {
117+
if val, err := strconv.ParseInt(reset, 10, 64); err == nil {
118+
info.Reset = val
119+
}
120+
}
121+
122+
if retryAfter := headers.Get("Retry-After"); retryAfter != "" {
123+
if val, err := strconv.Atoi(retryAfter); err == nil {
124+
info.RetryAfter = val
125+
}
126+
}
127+
128+
return info
129+
}

client/http.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,22 @@ import (
99
"net/http"
1010
)
1111

12-
// doRequest is a helper to make HTTP requests to GitHub Models API
13-
func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) error {
12+
// DoRequest is a helper to make HTTP requests to GitHub Models API
13+
func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) (http.Header, error) {
1414
url := fmt.Sprintf("%s%s", c.BaseURL, path)
1515

1616
var bodyReader io.Reader
1717
if body != nil {
1818
b, err := json.Marshal(body)
1919
if err != nil {
20-
return err
20+
return nil, err
2121
}
2222
bodyReader = bytes.NewReader(b)
2323
}
2424

2525
req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
2626
if err != nil {
27-
return err
27+
return nil, err
2828
}
2929

3030
req.Header.Set("Authorization", "Bearer "+c.token)
@@ -35,21 +35,29 @@ func (c *Client) DoRequest(ctx context.Context, method, path string, body interf
3535

3636
resp, err := c.Client.Do(req)
3737
if err != nil {
38-
return err
38+
return nil, err
3939
}
4040
defer resp.Body.Close()
4141

42+
// Extract only relevant headers
43+
headers := make(http.Header)
44+
for k, v := range resp.Header {
45+
if k == "X-RateLimit-Limit" || k == "X-RateLimit-Remaining" || k == "X-RateLimit-Reset" || k == "Retry-After" {
46+
headers[k] = v
47+
}
48+
}
49+
4250
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
4351
// Read response body for error message
4452
respBody, _ := io.ReadAll(resp.Body)
45-
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
53+
return headers, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
4654
}
4755

4856
if result != nil {
4957
if err := json.NewDecoder(resp.Body).Decode(result); err != nil {
50-
return err
58+
return headers, err
5159
}
5260
}
5361

54-
return nil
62+
return headers, nil
5563
}

endpoints/catalog.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99
// ListModels fetches all available models from GitHub Models catalog
1010
func ListModels(ctx context.Context, c *client.Client) ([]client.Model, error) {
1111
var models []client.Model
12-
err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models)
12+
_, err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models)
1313
if err != nil {
1414
return nil, err
1515
}

endpoints/inference.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,18 @@ import (
1010
// ChatCompletion sends a chat request to the GitHub Models API
1111
func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatRequest) (*models.ChatResponse, error) {
1212
var resp models.ChatResponse
13-
err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp)
13+
headers, err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp)
14+
15+
// Always attach headers if available, even on error
16+
if headers != nil {
17+
resp.RateLimit = client.ParseRateLimitHeaders(headers)
18+
}
19+
1420
if err != nil {
21+
// If we have headers (rate limits), return the partial response with the error
22+
if headers != nil {
23+
return &resp, err
24+
}
1525
return nil, err
1626
}
1727
return &resp, nil
@@ -21,8 +31,16 @@ func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatReques
2131
func OrgChatCompletion(ctx context.Context, c *client.Client, org string, req models.ChatRequest) (*models.ChatResponse, error) {
2232
path := "/orgs/" + org + "/inference/chat/completions"
2333
var resp models.ChatResponse
24-
err := c.DoRequest(ctx, "POST", path, req, &resp)
34+
headers, err := c.DoRequest(ctx, "POST", path, req, &resp)
35+
36+
if headers != nil {
37+
resp.RateLimit = client.ParseRateLimitHeaders(headers)
38+
}
39+
2540
if err != nil {
41+
if headers != nil {
42+
return &resp, err
43+
}
2644
return nil, err
2745
}
2846
return &resp, nil

models/inference.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,26 @@ type Choice struct {
1717
Message Message `json:"message"` // The generated message from the model
1818
}
1919

20+
// RateLimitInfo contains rate limit information from GitHub API response headers
21+
type RateLimitInfo struct {
22+
Limit int // X-RateLimit-Limit: Maximum requests per hour
23+
Remaining int // X-RateLimit-Remaining: Requests remaining in current window
24+
Reset int64 // X-RateLimit-Reset: Unix timestamp when the limit resets
25+
RetryAfter int // Retry-After: Seconds to wait before retrying (only on 429)
26+
}
27+
28+
// Usage contains token usage information from the API response
29+
type Usage struct {
30+
PromptTokens int `json:"prompt_tokens"`
31+
CompletionTokens int `json:"completion_tokens"`
32+
TotalTokens int `json:"total_tokens"`
33+
}
34+
2035
// ChatResponse represents the response from the chat completion endpoint
2136
type ChatResponse struct {
22-
ID string `json:"id"` // Response ID
23-
Object string `json:"object"` // Type of object, e.g., "chat.completion"
24-
Choices []Choice `json:"choices"` // List of choices
37+
ID string `json:"id"` // Response ID
38+
Object string `json:"object"` // Type of object, e.g., "chat.completion"
39+
Choices []Choice `json:"choices"` // List of choices
40+
Usage Usage `json:"usage"` // Token usage information
41+
RateLimit RateLimitInfo // Rate limit information from response headers
2542
}

0 commit comments

Comments
 (0)