diff --git a/.gitignore b/.gitignore index 2a0f1a1..5ee0999 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +CLAUDE.md + .specstory .idea deploy.sh diff --git a/content_intel.install b/content_intel.install new file mode 100644 index 0000000..d5e1341 --- /dev/null +++ b/content_intel.install @@ -0,0 +1,75 @@ + 'Stores search query logs for content intelligence.', + 'fields' => [ + 'id' => [ + 'description' => 'Primary key.', + 'type' => 'serial', + 'unsigned' => TRUE, + 'not null' => TRUE, + ], + 'keywords' => [ + 'description' => 'The search keywords.', + 'type' => 'varchar', + 'length' => 255, + 'not null' => TRUE, + ], + 'results_count' => [ + 'description' => 'Number of results returned.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0, + ], + 'index_id' => [ + 'description' => 'Optional search index identifier.', + 'type' => 'varchar', + 'length' => 128, + 'not null' => FALSE, + ], + 'timestamp' => [ + 'description' => 'Unix timestamp of the search.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0, + ], + ], + 'primary key' => ['id'], + 'indexes' => [ + 'keywords' => ['keywords'], + 'results_count' => ['results_count'], + 'timestamp' => ['timestamp'], + 'keywords_results' => ['keywords', 'results_count'], + ], + ]; + + return $schema; +} + +/** + * Install the search log table. + */ +function content_intel_update_10001(): void { + $schema = content_intel_schema(); + $database = \Drupal::database(); + + if (!$database->schema()->tableExists('content_intel_search_log')) { + $database->schema()->createTable( + 'content_intel_search_log', + $schema['content_intel_search_log'] + ); + } +} diff --git a/content_intel.services.yml b/content_intel.services.yml index 3ced029..e968a56 100644 --- a/content_intel.services.yml +++ b/content_intel.services.yml @@ -13,3 +13,11 @@ services: - '@renderer' - '@entity_type.bundle.info' - '@file_url_generator' + + content_intel.search_query_collector: + class: Drupal\content_intel\Service\SearchQueryCollector + arguments: + - '@database' + - '@module_handler' + - '@date.formatter' + - '@datetime.time' diff --git a/scripts/run-drupal-check.sh b/scripts/run-drupal-check.sh index c70e4fa..36d6d4f 100755 --- a/scripts/run-drupal-check.sh +++ b/scripts/run-drupal-check.sh @@ -26,6 +26,16 @@ parameters: - web/modules/contrib/content_intel # Set the analysis level (0-9) level: 5 + treatPhpDocTypesAsCertain: false + ignoreErrors: + # Ignore method_exists checks (Drupal pattern for optional features) + - '#Call to function method_exists\(\) .* will always evaluate to true#' + # Ignore new static() in plugin base class (Drupal pattern) + - '#Unsafe usage of new static\(\)#' + # Ignore boolean narrowing warnings + - '#Left side of && is always true#' + # Ignore nullsafe on non-nullable (defensive coding) + - '#Using nullsafe method call on non-nullable type#' EOF mkdir -p web/modules/contrib/ @@ -34,6 +44,9 @@ if [ ! -L "web/modules/contrib/content_intel" ]; then ln -s /src web/modules/contrib/content_intel fi +# Install the statistics module (removed from core in D11). +composer require drupal/statistics --no-interaction + # Install PHPStan extensions for Drupal 11 and Drush for command analysis composer require --dev phpstan/phpstan mglaman/phpstan-drupal phpstan/phpstan-deprecation-rules drush/drush --with-all-dependencies --no-interaction diff --git a/src/Drush/Commands/ContentIntelCommands.php b/src/Drush/Commands/ContentIntelCommands.php index 7bc3b37..c8940e8 100644 --- a/src/Drush/Commands/ContentIntelCommands.php +++ b/src/Drush/Commands/ContentIntelCommands.php @@ -6,6 +6,7 @@ use Consolidation\OutputFormatters\StructuredData\RowsOfFields; use Drupal\content_intel\Service\ContentIntelCollector; +use Drupal\content_intel\Service\SearchQueryCollectorInterface; use Drush\Attributes as CLI; use Drush\Commands\DrushCommands; use Symfony\Component\DependencyInjection\ContainerInterface; @@ -22,9 +23,12 @@ final class ContentIntelCommands extends DrushCommands { * * @param \Drupal\content_intel\Service\ContentIntelCollector $collector * The content intel collector service. + * @param \Drupal\content_intel\Service\SearchQueryCollectorInterface $searchQueryCollector + * The search query collector service. */ public function __construct( protected ContentIntelCollector $collector, + protected SearchQueryCollectorInterface $searchQueryCollector, ) { parent::__construct(); } @@ -34,7 +38,8 @@ public function __construct( */ public static function create(ContainerInterface $container): self { return new static( - $container->get('content_intel.collector') + $container->get('content_intel.collector'), + $container->get('content_intel.search_query_collector') ); } @@ -304,31 +309,111 @@ public function batch( $results = []; if ($options['ids']) { + // Use bulk loading for better performance. $ids = array_map('trim', explode(',', $options['ids'])); - foreach ($ids as $id) { - $entity = $this->collector->loadEntity($entity_type, $id); - if ($entity) { - $results[] = $this->collector->collectIntel($entity, [], $plugins); - } + $entities = $this->collector->loadEntities($entity_type, $ids); + foreach ($entities as $entity) { + $results[] = $this->collector->collectIntel($entity, [], $plugins); } } else { $bundle = $options['bundle'] ?: NULL; - $entities = $this->collector->listEntities( + $entity_summaries = $this->collector->listEntities( $entity_type, $bundle, (int) $options['limit'] ); - foreach ($entities as $entity_summary) { - $entity = $this->collector->loadEntity($entity_type, $entity_summary['id']); - if ($entity) { - $results[] = $this->collector->collectIntel($entity, [], $plugins); - } + // Extract IDs and use bulk loading for better performance. + $ids = array_column($entity_summaries, 'id'); + $entities = $this->collector->loadEntities($entity_type, $ids); + foreach ($entities as $entity) { + $results[] = $this->collector->collectIntel($entity, [], $plugins); } } return $results; } + /** + * List top search queries. + * + * @param array $options + * Command options. + * + * @return \Consolidation\OutputFormatters\StructuredData\RowsOfFields|array + * Search query data. + */ + #[CLI\Command(name: 'ci:searches', aliases: ['cisrc'])] + #[CLI\Option(name: 'limit', description: 'Maximum queries to return (default: 50)')] + #[CLI\Option(name: 'gaps', description: 'Show only content gaps (zero/low result searches)')] + #[CLI\Option(name: 'max-results', description: 'Max results threshold for gaps (default: 0)')] + #[CLI\Option(name: 'format', description: 'Output format: table, json, yaml (default: table)')] + #[CLI\FieldLabels(labels: [ + 'query' => 'Query', + 'count' => 'Count', + 'results_count' => 'Results', + 'last_searched' => 'Last Searched', + ])] + #[CLI\DefaultFields(fields: ['query', 'count', 'results_count'])] + #[CLI\Usage(name: 'drush ci:searches', description: 'List top search queries')] + #[CLI\Usage(name: 'drush ci:searches --gaps', description: 'Show searches with no results (content gaps)')] + #[CLI\Usage(name: 'drush ci:searches --limit=20 --format=json', description: 'Get top 20 queries as JSON')] + public function searches( + array $options = [ + 'limit' => 50, + 'gaps' => FALSE, + 'max-results' => 0, + 'format' => 'table', + ], + ): RowsOfFields|array { + if (!$this->searchQueryCollector->isAvailable()) { + $this->logger()->warning('No search query data source available. Run database updates to create the logging table, or install Search API with logging.'); + return new RowsOfFields([]); + } + + $limit = (int) $options['limit']; + + if ($options['gaps']) { + $queries = $this->searchQueryCollector->getContentGaps($limit, (int) $options['max-results']); + } + else { + $queries = $this->searchQueryCollector->getTopQueries($limit); + } + + // Flatten last_searched for table display. + $rows = array_map(function ($query) { + return [ + 'query' => $query['query'], + 'count' => $query['count'], + 'results_count' => $query['results_count'] ?? 'N/A', + 'last_searched' => $query['last_searched']['human'] ?? 'N/A', + ]; + }, $queries); + + if ($options['format'] === 'json' || $options['format'] === 'yaml') { + return $queries; + } + + return new RowsOfFields($rows); + } + + /** + * Show search query data source status. + * + * @return array + * Status information. + */ + #[CLI\Command(name: 'ci:search-status', aliases: ['ciss'])] + #[CLI\Usage(name: 'drush ci:search-status', description: 'Check search query logging status')] + public function searchStatus(): array { + return [ + 'available' => $this->searchQueryCollector->isAvailable(), + 'source' => $this->searchQueryCollector->getSource(), + 'message' => $this->searchQueryCollector->isAvailable() + ? 'Search query logging is active.' + : 'No search query data source found. Run "drush updb" to create the logging table.', + ]; + } + } diff --git a/src/Plugin/ContentIntel/ContentTranslationPlugin.php b/src/Plugin/ContentIntel/ContentTranslationPlugin.php index 3dea1f7..ab3a43a 100644 --- a/src/Plugin/ContentIntel/ContentTranslationPlugin.php +++ b/src/Plugin/ContentIntel/ContentTranslationPlugin.php @@ -10,6 +10,7 @@ use Drupal\Core\Entity\ContentEntityInterface; use Drupal\Core\Language\LanguageManagerInterface; use Drupal\Core\StringTranslation\TranslatableMarkup; +use Psr\Log\LoggerInterface; use Symfony\Component\DependencyInjection\ContainerInterface; /** @@ -37,6 +38,13 @@ class ContentTranslationPlugin extends ContentIntelPluginBase { */ protected ?LanguageManagerInterface $languageManager = NULL; + /** + * The logger. + * + * @var \Psr\Log\LoggerInterface + */ + protected LoggerInterface $logger; + /** * {@inheritdoc} */ @@ -53,6 +61,7 @@ public static function create( } $instance->languageManager = $container->get('language_manager'); + $instance->logger = $container->get('logger.factory')->get('content_intel'); return $instance; } @@ -143,7 +152,12 @@ public function collect(ContentEntityInterface $entity): array { } } catch (\Exception $e) { - // Metadata not available. + // Log the error but continue gracefully. + $this->logger->warning( + 'Failed to get translation metadata for @langcode: @message', + ['@langcode' => $langcode, '@message' => $e->getMessage()] + ); + $detail['metadata_error'] = $e->getMessage(); } } diff --git a/src/Plugin/ContentIntel/StatisticsPlugin.php b/src/Plugin/ContentIntel/StatisticsPlugin.php index 0e84012..3c4237b 100644 --- a/src/Plugin/ContentIntel/StatisticsPlugin.php +++ b/src/Plugin/ContentIntel/StatisticsPlugin.php @@ -6,6 +6,7 @@ use Drupal\content_intel\Attribute\ContentIntel; use Drupal\content_intel\ContentIntelPluginBase; +use Drupal\Core\Datetime\DateFormatterInterface; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\Core\StringTranslation\TranslatableMarkup; use Drupal\node\NodeInterface; @@ -31,6 +32,13 @@ class StatisticsPlugin extends ContentIntelPluginBase { */ protected ?StatisticsStorageInterface $statisticsStorage = NULL; + /** + * The date formatter. + * + * @var \Drupal\Core\Datetime\DateFormatterInterface + */ + protected DateFormatterInterface $dateFormatter; + /** * {@inheritdoc} */ @@ -45,6 +53,7 @@ public static function create( if ($container->has('statistics.storage.node')) { $instance->statisticsStorage = $container->get('statistics.storage.node'); } + $instance->dateFormatter = $container->get('date.formatter'); return $instance; } @@ -90,7 +99,7 @@ public function collect(ContentEntityInterface $entity): array { 'last_view' => $timestamp ? [ 'timestamp' => $timestamp, 'iso8601' => date('c', $timestamp), - 'human' => \Drupal::service('date.formatter')->format($timestamp, 'medium'), + 'human' => $this->dateFormatter->format($timestamp, 'medium'), ] : NULL, ]; } diff --git a/src/Service/ContentIntelCollector.php b/src/Service/ContentIntelCollector.php index 34ffc17..076a482 100644 --- a/src/Service/ContentIntelCollector.php +++ b/src/Service/ContentIntelCollector.php @@ -17,7 +17,7 @@ /** * Service for collecting content intelligence from various sources. */ -class ContentIntelCollector { +class ContentIntelCollector implements ContentIntelCollectorInterface { /** * Constructs a ContentIntelCollector. @@ -141,6 +141,27 @@ public function loadEntity(string $entity_type_id, int|string $entity_id): ?Cont return $entity instanceof ContentEntityInterface ? $entity : NULL; } + /** + * {@inheritdoc} + */ + public function loadEntities(string $entity_type_id, array $entity_ids): array { + if (empty($entity_ids)) { + return []; + } + $entities = $this->entityTypeManager + ->getStorage($entity_type_id) + ->loadMultiple($entity_ids); + + // Preserve input order and filter to ContentEntityInterface. + $result = []; + foreach ($entity_ids as $id) { + if (isset($entities[$id]) && $entities[$id] instanceof ContentEntityInterface) { + $result[$id] = $entities[$id]; + } + } + return $result; + } + /** * Lists entities matching criteria. * @@ -190,7 +211,9 @@ public function listEntities( $results = []; foreach ($entities as $entity) { - $results[] = $this->getEntitySummary($entity); + if ($entity instanceof ContentEntityInterface) { + $results[] = $this->getEntitySummary($entity); + } } return $results; diff --git a/src/Service/ContentIntelCollectorInterface.php b/src/Service/ContentIntelCollectorInterface.php new file mode 100644 index 0000000..a2be0f8 --- /dev/null +++ b/src/Service/ContentIntelCollectorInterface.php @@ -0,0 +1,135 @@ +getSource() !== 'none'; + } + + /** + * {@inheritdoc} + */ + public function getSource(): string { + if ($this->source !== NULL) { + return $this->source; + } + + // Check for our custom logging table. + if ($this->database->schema()->tableExists('content_intel_search_log')) { + $this->source = 'content_intel'; + return $this->source; + } + + // Check for Search API Saved Searches or similar. + if ($this->moduleHandler->moduleExists('search_api') + && $this->database->schema()->tableExists('search_api_log')) { + $this->source = 'search_api_log'; + return $this->source; + } + + $this->source = 'none'; + return $this->source; + } + + /** + * {@inheritdoc} + */ + public function getTopQueries(int $limit = 50): array { + $source = $this->getSource(); + + if ($source === 'content_intel') { + return $this->getTopQueriesFromContentIntel($limit); + } + + if ($source === 'search_api_log') { + return $this->getTopQueriesFromSearchApiLog($limit); + } + + return []; + } + + /** + * {@inheritdoc} + */ + public function getContentGaps(int $limit = 50, int $max_results = 0): array { + $source = $this->getSource(); + + if ($source === 'content_intel') { + return $this->getContentGapsFromContentIntel($limit, $max_results); + } + + if ($source === 'search_api_log') { + return $this->getContentGapsFromSearchApiLog($limit, $max_results); + } + + return []; + } + + /** + * Gets top queries from content_intel_search_log table. + * + * @param int $limit + * Maximum queries to return. + * + * @return array + * Query data. + */ + protected function getTopQueriesFromContentIntel(int $limit): array { + $query = $this->database->select('content_intel_search_log', 'l') + ->fields('l', ['keywords']) + ->groupBy('l.keywords'); + + $query->addExpression('COUNT(*)', 'count'); + $query->addExpression('AVG(l.results_count)', 'avg_results'); + $query->addExpression('MAX(l.timestamp)', 'last_searched'); + + $query->orderBy('count', 'DESC') + ->range(0, $limit); + + $results = $query->execute()->fetchAll(); + + return array_map(function ($row) { + return [ + 'query' => $row->keywords, + 'count' => (int) $row->count, + 'results_count' => $row->avg_results !== NULL ? (int) round($row->avg_results) : NULL, + 'last_searched' => $row->last_searched ? [ + 'timestamp' => (int) $row->last_searched, + 'iso8601' => date('c', (int) $row->last_searched), + 'human' => $this->dateFormatter->format((int) $row->last_searched, 'medium'), + ] : NULL, + ]; + }, $results); + } + + /** + * Gets content gaps from content_intel_search_log table. + * + * @param int $limit + * Maximum queries to return. + * @param int $max_results + * Maximum result count threshold. + * + * @return array + * Query data for low/no result searches. + */ + protected function getContentGapsFromContentIntel(int $limit, int $max_results): array { + $query = $this->database->select('content_intel_search_log', 'l') + ->fields('l', ['keywords']) + ->groupBy('l.keywords'); + + $query->addExpression('COUNT(*)', 'count'); + $query->addExpression('AVG(l.results_count)', 'avg_results'); + $query->addExpression('MAX(l.timestamp)', 'last_searched'); + + $query->having('AVG(l.results_count) <= :max', [':max' => $max_results]); + $query->orderBy('count', 'DESC') + ->range(0, $limit); + + $results = $query->execute()->fetchAll(); + + return array_map(function ($row) { + return [ + 'query' => $row->keywords, + 'count' => (int) $row->count, + 'results_count' => (int) round($row->avg_results), + 'last_searched' => $row->last_searched ? [ + 'timestamp' => (int) $row->last_searched, + 'iso8601' => date('c', (int) $row->last_searched), + 'human' => $this->dateFormatter->format((int) $row->last_searched, 'medium'), + ] : NULL, + 'is_content_gap' => TRUE, + ]; + }, $results); + } + + /** + * Gets top queries from search_api_log table. + * + * @param int $limit + * Maximum queries to return. + * + * @return array + * Query data. + */ + protected function getTopQueriesFromSearchApiLog(int $limit): array { + // Search API Log module schema may vary. + // This is a common implementation pattern. + if (!$this->database->schema()->fieldExists('search_api_log', 'keywords')) { + return []; + } + + $query = $this->database->select('search_api_log', 'l') + ->fields('l', ['keywords']) + ->groupBy('l.keywords'); + + $query->addExpression('COUNT(*)', 'count'); + $query->addExpression('MAX(l.timestamp)', 'last_searched'); + + $query->orderBy('count', 'DESC') + ->range(0, $limit); + + $results = $query->execute()->fetchAll(); + + return array_map(function ($row) { + return [ + 'query' => $row->keywords, + 'count' => (int) $row->count, + 'results_count' => NULL, + 'last_searched' => $row->last_searched ? [ + 'timestamp' => (int) $row->last_searched, + 'iso8601' => date('c', (int) $row->last_searched), + 'human' => $this->dateFormatter->format((int) $row->last_searched, 'medium'), + ] : NULL, + ]; + }, $results); + } + + /** + * Gets content gaps from search_api_log table. + * + * @param int $limit + * Maximum queries to return. + * @param int $max_results + * Maximum result count threshold. + * + * @return array + * Query data. + */ + protected function getContentGapsFromSearchApiLog(int $limit, int $max_results): array { + // Search API Log may not track result counts. + // Return empty if the field doesn't exist. + if (!$this->database->schema()->fieldExists('search_api_log', 'num_results')) { + return []; + } + + $query = $this->database->select('search_api_log', 'l') + ->fields('l', ['keywords']) + ->groupBy('l.keywords'); + + $query->addExpression('COUNT(*)', 'count'); + $query->addExpression('AVG(l.num_results)', 'avg_results'); + $query->addExpression('MAX(l.timestamp)', 'last_searched'); + + $query->having('AVG(l.num_results) <= :max', [':max' => $max_results]); + $query->orderBy('count', 'DESC') + ->range(0, $limit); + + $results = $query->execute()->fetchAll(); + + return array_map(function ($row) { + return [ + 'query' => $row->keywords, + 'count' => (int) $row->count, + 'results_count' => (int) round($row->avg_results), + 'last_searched' => $row->last_searched ? [ + 'timestamp' => (int) $row->last_searched, + 'iso8601' => date('c', (int) $row->last_searched), + 'human' => $this->dateFormatter->format((int) $row->last_searched, 'medium'), + ] : NULL, + 'is_content_gap' => TRUE, + ]; + }, $results); + } + + /** + * Logs a search query (for sites using content_intel logging). + * + * @param string $keywords + * The search keywords. + * @param int $results_count + * The number of results returned. + * @param string|null $index_id + * Optional search index identifier. + */ + public function logQuery(string $keywords, int $results_count, ?string $index_id = NULL): void { + if (!$this->database->schema()->tableExists('content_intel_search_log')) { + return; + } + + $keywords = trim($keywords); + if (empty($keywords)) { + return; + } + + $this->database->insert('content_intel_search_log') + ->fields([ + 'keywords' => mb_substr($keywords, 0, 255), + 'results_count' => $results_count, + 'index_id' => $index_id, + 'timestamp' => $this->time->getRequestTime(), + ]) + ->execute(); + } + +} diff --git a/src/Service/SearchQueryCollectorInterface.php b/src/Service/SearchQueryCollectorInterface.php new file mode 100644 index 0000000..4507575 --- /dev/null +++ b/src/Service/SearchQueryCollectorInterface.php @@ -0,0 +1,56 @@ +