Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# misc
.DS_Store
*.pem
*.bak

# debug
npm-debug.log*
Expand Down
Binary file added assets/chrome.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 8 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
"react": "18.2.0",
"react-dom": "18.2.0",
"tailwindcss": "3.4.1",
"util": "^0.12.5",
"uuid": "^13.0.0"
"util": "^0.12.5"
},
"devDependencies": {
"@babel/preset-env": "^7.26.9",
Expand Down Expand Up @@ -63,13 +62,18 @@
},
"manifest": {
"permissions": [
"tabs",
"history",
"activeTab",
"scripting",
"storage",
"cookies"
"cookies",
"tabGroups"
],
"host_permissions": [
"https://*/*",
"http://*/*"
]
},
"type": "module"
}
}
328 changes: 328 additions & 0 deletions src/background.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,331 @@
const CLUSTER_COLORS = [
'blue', 'red', 'yellow', 'green', 'pink',
'purple', 'cyan', 'orange', 'grey'
] as const;

async function createTabGroupsFromClusters(clusters: [number, number[], string][]) {
try {
// Sort clusters by size (largest first)
clusters.sort((a, b) => b[1].length - a[1].length);

for (const [clusterIndex, [clusterId, tabIds, label]] of clusters.entries()) {
if (tabIds.length === 0) continue;

// Create a group for this cluster
const groupId = await chrome.tabs.group({ tabIds });

// Update group with cluster name and color
const color = CLUSTER_COLORS[clusterIndex % CLUSTER_COLORS.length];
await chrome.tabGroups.update(groupId, {
title: `${label} (${tabIds.length})`, // Use the actual cluster label
color: color,
collapsed: tabIds.length > 5
});

}
} catch (error) {
console.error('Error creating tab groups:', error);
throw error;
}
}
async function fetchClustersFromMantis(spaceId: string, tabsMap: Map<number, number>): Promise<[number, number[], string][]> {
return new Promise((resolve, reject) => {
try {
const backendUrl = process.env.PLASMO_PUBLIC_MANTIS_API || 'http://localhost:8000';
const wsUrl = backendUrl.replace('http', 'ws') + `/ws/space/${spaceId}/`;

// Connecting to WebSocket

const ws = new WebSocket(wsUrl);
let clustersReceived = false;
let pointsWithClustersReceived = false;
let pointsWithMetadataReceived = false;

const clusterLabels = new Map<string, string>(); // Map cluster ID to label
const pointToClusterMap = new Map<string, string>(); // ← ADD THIS: Map point ID to cluster ID
const clusterGroups = new Map<string, number[]>(); // Map cluster ID to tab IDs


ws.addEventListener('message', (event) => {
const data = JSON.parse(event.data);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The WebSocket message data is parsed into an any type (JSON.parse(event.data)). This undermines the benefits of TypeScript. To improve type safety, code clarity, and maintainability, you should define interfaces for the expected WebSocket message structures and cast the parsed data to that type. For example:

interface Cluster {
  id: string;
  label?: string;
}

interface Point {
  id: string;
  cluster?: string;
  metadata?: {
    tab_id?: string;
  };
}

interface WsMessage {
  type: 'cluster' | 'points' | 'finished';
  clusters?: Cluster[];
  points?: Point[];
}

const data: WsMessage = JSON.parse(event.data);


// Collect cluster labels (only update with real names, not UUIDs)
if (data.type === 'cluster' && data.clusters) {

// Recieved Cluster Labels

data.clusters.forEach((cluster: any) => {
const label = cluster.label?.trim();

if (label && !label.startsWith('Cluster ')) {
clusterLabels.set(cluster.id, label);
}
});

clustersReceived = true;
}

// First points message: Get cluster assignments (has cluster field)
if (data.type === 'points' && data.points && data.points[0]?.cluster && !pointsWithClustersReceived) {
// Processing points with cluster assignments

data.points.forEach((point: any) => {
if (point.cluster && point.id) {
pointToClusterMap.set(point.id, point.cluster);
}
});

pointsWithClustersReceived = true;
}

// Later points message: Get tab_id metadata (has metadata.tab_id field)
if (data.type === 'points' && data.points && data.points[0]?.metadata?.tab_id && !pointsWithMetadataReceived) {
// Processing points with tab IDs

data.points.forEach((point: any) => {
const tabId = parseInt(point.metadata.tab_id);
const pointId = point.id;
const clusterId = pointToClusterMap.get(pointId);

if (clusterId && tabId) {
if (!clusterGroups.has(clusterId)) {
clusterGroups.set(clusterId, []);
}
clusterGroups.get(clusterId)!.push(tabId);
}
});

pointsWithMetadataReceived = true;
}

if (data.type === 'finished') {
// WebSocket finished loading data
ws.close();

if (clustersReceived && pointsWithClustersReceived && pointsWithMetadataReceived && clusterGroups.size > 0) {
const result: [number, number[], string][] = Array.from(clusterGroups.entries()).map(
([clusterId, tabIds]) => {
const label = clusterLabels.get(clusterId) || `Cluster ${clusterId}`;
return [clusterId as any, tabIds, label];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The clusterId is cast to any to satisfy the function's return type signature Promise<[number, number[], string][]>. However, clusterId is a string (likely a UUID), not a number. This bypasses type safety and can lead to bugs. The type signature should be updated to reflect the actual data type.

The return type of fetchClustersFromMantis (line 31), the type of clusterGroups (line 146), and the parameter for createTabGroupsFromClusters (line 6) should all be changed from [number, number[], string][] to [string, number[], string][]. Then, this as any cast can be safely removed.

Suggested change
return [clusterId as any, tabIds, label];
return [clusterId, tabIds, label];

}
);

result.sort((a, b) => b[1].length - a[1].length);

resolve(result);
} else {
reject(new Error(`Missing data: clusters=${clustersReceived}, pointsWithClusters=${pointsWithClustersReceived}, pointsWithMetadata=${pointsWithMetadataReceived}, groups=${clusterGroups.size}`));
}
}
});

ws.addEventListener('error', (error) => {
console.error('💥 WebSocket error:', error);
ws.close();
reject(new Error('WebSocket connection failed'));
});


setTimeout(() => {
if (!clustersReceived || !pointsWithClustersReceived || !pointsWithMetadataReceived) {
ws.close();
reject(new Error('WebSocket timeout'));
}
}, 30000);

} catch (error) {
console.error('💥 Error setting up WebSocket:', error);
reject(error);
}
});
}

chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === "fetchClusters") {
fetchClustersFromMantis(request.spaceId, new Map(request.tabsMap))
.then((clusterGroups: [number, number[], string][]) => { // Add the string type for label
createTabGroupsFromClusters(clusterGroups)
.then(() => sendResponse({ success: true }))
.catch(error => sendResponse({ success: false, error: error.message }));
})
.catch(error => sendResponse({ success: false, error: error.message }));

return true; // Keep message channel open for async response
}
});
Comment on lines +143 to +155
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

You are adding multiple chrome.runtime.onMessage.addListener calls throughout this file. In a Chrome extension's background script, only one onMessage listener can be active at a time. Each new call to addListener overwrites the previous one, which will cause message handlers for actions like fetchClusters to be ignored. All message handling logic should be consolidated into a single onMessage listener, using a switch statement or an if/else if chain on request.action to delegate to the correct handler.



// This is used to get all tabs in the browser, and some of their content
// Add timeout wrapper function
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, defaultValue: T): Promise<T> {
return Promise.race([
promise,
new Promise<T>((resolve) => setTimeout(() => resolve(defaultValue), timeoutMs))
]);
}

chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
// Handle getTabsWithContent request
if (request.action === "getTabsWithContent") {
chrome.tabs.query({}, async (tabs) => {
if (chrome.runtime.lastError) {
sendResponse({ error: chrome.runtime.lastError.message });
return;
}

// Process Tabs

const tabsWithContentPromises = tabs.map(async (tab, index) => {
const tabData = { ...tab, pageContent: '' };

try {
if (tab.id && tab.url && !tab.url.startsWith('chrome://') && !tab.url.startsWith('chrome-extension://')) {
// Add 5 second timeout per tab
const results = await withTimeout(
chrome.scripting.executeScript({
target: { tabId: tab.id },
func: getPageContent,
}),
5000, // 5 second timeout
null
);

if (results && results[0] && results[0].result) {
tabData.pageContent = results[0].result;
} else {
tabData.pageContent = `Content from ${new URL(tab.url).hostname} - timed out`;
}
}
} catch (error) {
console.warn(`⚠️ Could not get content for tab ${tab.id}:`, error.message);
tabData.pageContent = `Content from ${tab.url ? new URL(tab.url).hostname : 'unknown site'} - unable to read page content`;
}


return tabData;
});

const tabsWithContent = await Promise.all(tabsWithContentPromises);

sendResponse({ tabs: tabsWithContent });
});
return true;
}


return false;
});

// This is for filtering text nodes in the page content extraction
// It feels less appropriate to have this logic here,
// but this function was long enough to warrant its own helper
const acceptNode = (node, excludedTags = ['script', 'style', 'noscript', 'iframe', 'object'], minTextLength = 3) => {
// Skip script, style, and other non-visible content
const parent = node.parentElement;
if (!parent) return NodeFilter.FILTER_REJECT;

const tagName = parent.tagName.toLowerCase();
if (excludedTags.includes(tagName)) {
return NodeFilter.FILTER_REJECT;
}

// Skip if parent is hidden
const style = window.getComputedStyle(parent);
if (style.display === 'none' || style.visibility === 'hidden') {
return NodeFilter.FILTER_REJECT;
}

// Only accept text nodes with meaningful content
const text = node.textContent?.trim() || '';
if (text.length < minTextLength) return NodeFilter.FILTER_REJECT;

return NodeFilter.FILTER_ACCEPT;
};

// This gets the page content from a tab.
function getPageContent() {
try {
const title = document.title || '';
const url = window.location.href;
const domain = window.location.hostname;

// Get ALL visible text from the page
let allText = '';

// Method 1: Try to get all text from body
if (document.body) {
// Get all text content, which automatically excludes HTML tags
allText = document.body.innerText || document.body.textContent || '';
}

// If body approach fails, try document-wide text extraction
if (!allText || allText.length < 100) {
// Get all text nodes in the document
const walker = document.createTreeWalker(
document.body || document.documentElement,
NodeFilter.SHOW_TEXT,
{ acceptNode }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

acceptNode is called here, but it's defined in the background script's scope. This function getPageContent is executed in the tab's context where acceptNode will be undefined, causing a ReferenceError. The acceptNode function definition should be moved inside getPageContent to ensure it's in the correct scope when the script is executed.

);

const textNodes = [];
let node;
while (node = walker.nextNode()) {
const text = node.textContent?.trim();
if (text && text.length > 2) {
textNodes.push(text);
}
}

allText = textNodes.join(' ');
}

// Clean up the text
allText = allText
.replace(/\s+/g, ' ') // Replace multiple whitespace with single space
.trim();

// Take a reasonable sample of the text (first 300 chars)
const textSample = allText.substring(0, 300);

// Combine title and text content
let result = '';
if (title && title.trim()) {
result += `${title.trim()}. `;
}

if (textSample && textSample.length > 10) {
// Remove title from content if it's repeated
let contentText = textSample;
if (title && textSample.toLowerCase().startsWith(title.toLowerCase())) {
contentText = textSample.substring(title.length).trim();
if (contentText.startsWith('.') || contentText.startsWith('-')) {
contentText = contentText.substring(1).trim();
}
}

if (contentText.length > 10) {
result += contentText;
}
}

// Generic fallback if no meaningful content found
if (!result.trim() || result.trim().length < 20) {
result = `Content from ${domain} - ${title || url.split('/').pop() || 'webpage'}`;
}

return result || `Page from ${domain}`;

} catch (error) {
console.error('Error extracting page content:', error);

// Simple fallback
const domain = window.location.hostname;
const title = document.title || '';

return title || `Content from ${domain}`;
}
}

// This is used to register cookies in the browser
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === "setCookie") {
Expand Down
3 changes: 2 additions & 1 deletion src/connection_manager.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import { GoogleScholarConnection } from "./connections/googleScholar/connection"
import { WikipediaSegmentConnection } from "./connections/wikipediaSegment/connection";
import { GmailConnection } from "./connections/Gmail/connection";
import { LinkedInConnection } from "./connections/Linkedin/connection";
import { ChromeTabsConnection } from "./connections/chromeTabs/connection";


export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection];
export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection, ChromeTabsConnection];

export const searchConnections = (url: string, ) => {
const connections = CONNECTIONS.filter(connection => connection.trigger(url));
Expand Down
Loading