Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,25 @@ arxiv-cli config get headless
arxiv-cli config path
```

### Chrome Arguments
For Docker/devcontainer environments, you may need to pass additional Chrome flags:

```json
{
"browser_path": "/usr/bin/google-chrome",
"chrome_args": [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-gpu"
]
}
```

**Note**: When the `CI` environment variable is set, the following flags are automatically added:
- `--disable-gpu`
- `--no-sandbox`
- `--disable-setuid-sandbox`

## Implementation Details
- **Stack**: Rust, Clap, Custom CDP Client (`tokio-tungstenite`), Serde, Reqwest, PDF-Extract, `mcp-sdk-rs`.
- **Search Scraping**: Uses a custom Chrome DevTools Protocol (CDP) client to handle dynamic search result loaded via JS.
Expand Down
14 changes: 13 additions & 1 deletion src/core/cdp/browser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,21 @@ impl BrowserManager {
return Ok(Arc::clone(browser));
}

let args = vec![
// Build Chrome args from config
let mut args = vec![
"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
];

// Add custom Chrome args from config
args.extend(self.config.chrome_args.iter().map(|s| s.as_str()));

// In CI environments, automatically add sandbox-disabling flags
if std::env::var("CI").is_ok() {
args.push("--disable-gpu");
args.push("--no-sandbox");
args.push("--disable-setuid-sandbox");
}

let browser_path = self.config.browser_path.as_ref().map(PathBuf::from);

let browser =
Expand Down
11 changes: 9 additions & 2 deletions src/core/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ pub struct Config {
pub headless: bool,
#[serde(default)]
pub browser_path: Option<String>,
#[serde(default)]
pub chrome_args: Vec<String>,
}

fn default_headless() -> bool {
Expand All @@ -18,7 +20,7 @@ fn default_headless() -> bool {

impl Default for Config {
fn default() -> Self {
Self { headless: true, browser_path: None }
Self { headless: true, browser_path: None, chrome_args: Vec::new() }
}
}

Expand Down Expand Up @@ -107,6 +109,7 @@ mod tests {
let config = Config::default();
assert!(config.headless);
assert!(config.browser_path.is_none());
assert!(config.chrome_args.is_empty());
}

#[test]
Expand Down Expand Up @@ -185,7 +188,11 @@ mod tests {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("config.json");

let config = Config { headless: false, browser_path: Some("/custom/path".to_string()) };
let config = Config {
headless: false,
browser_path: Some("/custom/path".to_string()),
chrome_args: vec!["--no-sandbox".to_string(), "--disable-gpu".to_string()],
};
config.save_to(&path).unwrap();

let loaded = Config::load_from(&path).unwrap();
Expand Down