diff --git a/.gitignore b/.gitignore
index dd59c1a..1ac12f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ composer.phar
composer.lock
vagrant_ansible_inventory_default
Vagrantfile
-.DS_Store
\ No newline at end of file
+.DS_Store
+/var/cache/rector
\ No newline at end of file
diff --git a/composer.json b/composer.json
index 3fa31b9..40053a6 100644
--- a/composer.json
+++ b/composer.json
@@ -13,8 +13,8 @@
}
],
"require": {
- "php": ">=5.5.9"
- },
+ "php": ">=8.3"
+ },
"autoload": {
"psr-4": {
"Sentiment\\": "src/"
@@ -24,5 +24,32 @@
"psr-4": {
"Sentiment\\Tests\\": "tests/"
}
+ },
+ "require-dev": {
+ "rector/rector": "^2.1",
+ "phpstan/phpstan": "^2.1",
+ "pestphp/pest": "*",
+ "laravel/pint": "^1.25"
+ },
+ "scripts": {
+ "rector": "vendor/bin/rector process",
+ "rector-dry": "vendor/bin/rector process --dry-run",
+ "phpstan": "vendor/bin/phpstan analyse",
+ "phpstan-baseline": "vendor/bin/phpstan analyse --generate-baseline",
+ "test": "vendor/bin/pest",
+ "test-coverage": "vendor/bin/pest --coverage",
+ "pint": "vendor/bin/pint",
+ "pint-test": "vendor/bin/pint --test",
+ "ready": [
+ "@rector",
+ "@pint",
+ "@phpstan",
+ "@test"
+ ]
+ },
+ "config": {
+ "allow-plugins": {
+ "pestphp/pest-plugin": true
+ }
}
-}
\ No newline at end of file
+}
diff --git a/phpstan.neon b/phpstan.neon
new file mode 100644
index 0000000..3f85672
--- /dev/null
+++ b/phpstan.neon
@@ -0,0 +1,6 @@
+parameters:
+ level: 5
+ paths:
+ - src
+ tmpDir: var/cache/phpstan
+ reportUnmatchedIgnoredErrors: false
\ No newline at end of file
diff --git a/phpunit.xml b/phpunit.xml
new file mode 100644
index 0000000..e6198e0
--- /dev/null
+++ b/phpunit.xml
@@ -0,0 +1,18 @@
+
+
+
+
+ ./tests
+
+
+
+
+ app
+ src
+
+
+
diff --git a/pint.json b/pint.json
new file mode 100644
index 0000000..156df32
--- /dev/null
+++ b/pint.json
@@ -0,0 +1,110 @@
+{
+ "preset": "psr12",
+ "exclude": [
+ "vendor",
+ "var"
+ ],
+ "rules": {
+ "array_syntax": {
+ "syntax": "short"
+ },
+ "binary_operator_spaces": {
+ "default": "single_space"
+ },
+ "blank_line_after_namespace": true,
+ "blank_line_after_opening_tag": true,
+ "blank_line_before_statement": {
+ "statements": ["return"]
+ },
+ "braces": {
+ "allow_single_line_closure": true,
+ "position_after_functions_and_oop_constructs": "next",
+ "position_after_control_structures": "same",
+ "position_after_anonymous_constructs": "same"
+ },
+ "cast_spaces": {
+ "space": "single"
+ },
+ "class_attributes_separation": {
+ "elements": {
+ "method": "one"
+ }
+ },
+ "concat_space": {
+ "spacing": "none"
+ },
+ "declare_equal_normalize": true,
+ "function_typehint_space": true,
+ "single_line_comment_style": {
+ "comment_types": ["hash"]
+ },
+ "include": true,
+ "lowercase_cast": true,
+ "lowercase_static_reference": true,
+ "magic_constant_casing": true,
+ "method_argument_space": {
+ "on_multiline": "ensure_fully_multiline"
+ },
+ "method_chaining_indentation": true,
+ "no_blank_lines_after_class_opening": true,
+ "no_blank_lines_after_phpdoc": true,
+ "no_empty_phpdoc": true,
+ "no_empty_statement": true,
+ "no_extra_blank_lines": {
+ "tokens": [
+ "extra",
+ "throw",
+ "use"
+ ]
+ },
+ "no_leading_import_slash": true,
+ "no_leading_namespace_whitespace": true,
+ "no_mixed_echo_print": true,
+ "no_multiline_whitespace_around_double_arrow": true,
+ "no_short_bool_cast": true,
+ "no_singleline_whitespace_before_semicolons": true,
+ "no_spaces_around_offset": true,
+ "no_trailing_comma_in_singleline": true,
+ "no_unneeded_control_parentheses": true,
+ "no_unneeded_curly_braces": true,
+ "no_unused_imports": true,
+ "no_whitespace_before_comma_in_array": true,
+ "no_whitespace_in_blank_line": true,
+ "normalize_index_brace": true,
+ "object_operator_without_whitespace": true,
+ "ordered_imports": {
+ "sort_algorithm": "alpha"
+ },
+ "php_unit_fqcn_annotation": true,
+ "phpdoc_align": {
+ "align": "vertical"
+ },
+ "phpdoc_annotation_without_dot": true,
+ "phpdoc_indent": true,
+ "phpdoc_inline_tag_normalizer": true,
+ "phpdoc_no_access": true,
+ "phpdoc_no_alias_tag": true,
+ "phpdoc_no_package": true,
+ "phpdoc_no_useless_inheritdoc": true,
+ "phpdoc_return_self_reference": true,
+ "phpdoc_scalar": true,
+ "phpdoc_single_line_var_spacing": true,
+ "phpdoc_trim": true,
+ "phpdoc_types": true,
+ "phpdoc_var_without_name": true,
+ "return_type_declaration": true,
+ "single_class_element_per_statement": true,
+ "single_import_per_statement": true,
+ "single_line_after_imports": true,
+ "single_quote": true,
+ "space_after_semicolon": {
+ "remove_in_empty_for_expressions": true
+ },
+ "standardize_not_equals": true,
+ "ternary_operator_spaces": true,
+ "trailing_comma_in_multiline": true,
+ "trim_array_spaces": true,
+ "unary_operator_spaces": true,
+ "whitespace_after_comma_in_array": true
+ }
+}
\ No newline at end of file
diff --git a/rector.php b/rector.php
new file mode 100644
index 0000000..7f08411
--- /dev/null
+++ b/rector.php
@@ -0,0 +1,31 @@
+withPaths([
+ __DIR__.'/src',
+ ])
+ ->withPhpSets(
+ php83: true
+ )
+ ->withSets([
+ // Apply all PHP version upgrades up to PHP 8.3
+ LevelSetList::UP_TO_PHP_83,
+
+ // Code quality improvements
+ SetList::CODE_QUALITY,
+ SetList::DEAD_CODE,
+ SetList::EARLY_RETURN,
+ SetList::TYPE_DECLARATION,
+
+ // Modern PHP practices
+ SetList::PRIVATIZATION,
+ SetList::NAMING,
+ ])
+ ->withParallel()
+ ->withCache(__DIR__.'/var/cache/rector');
diff --git a/src/Analyzer.php b/src/Analyzer.php
index 4088b7b..6ec941b 100644
--- a/src/Analyzer.php
+++ b/src/Analyzer.php
@@ -11,18 +11,26 @@
class Analyzer
{
- private $lexicon_file = "";
- private $lexicon = "";
+ /**
+ * @var string
+ */
+ public $emoji_lexicon;
+ /**
+ * @var mixed[]
+ */
+ public $emojis;
+ private readonly string $lexicon_file;
+ private array $lexicon;
- private $current_sentitext = null;
+ private ?\Sentiment\Procedures\SentiText $sentiText = null;
- public function __construct($lexicon_file = "Lexicons/vader_sentiment_lexicon.txt",$emoji_lexicon='Lexicons/emoji_utf8_lexicon.txt')
+ public function __construct(string $lexicon_file = 'Lexicons/vader_sentiment_lexicon.txt', string $emoji_lexicon = 'Lexicons/emoji_utf8_lexicon.txt')
{
//Not sure about this as it forces lexicon file to be in the same directory as executing script
- $this->lexicon_file = __DIR__ . DIRECTORY_SEPARATOR . $lexicon_file;
+ $this->lexicon_file = __DIR__.DIRECTORY_SEPARATOR.$lexicon_file;
$this->lexicon = $this->make_lex_dict();
- $this->emoji_lexicon = __DIR__ . DIRECTORY_SEPARATOR .$emoji_lexicon;
+ $this->emoji_lexicon = __DIR__.DIRECTORY_SEPARATOR.$emoji_lexicon;
$this->emojis = $this->make_emoji_dict();
}
@@ -30,35 +38,32 @@ public function __construct($lexicon_file = "Lexicons/vader_sentiment_lexicon.tx
/*
Determine if input contains negation words
*/
- public function IsNegated($wordToTest, $include_nt = true)
+ public function IsNegated($wordToTest, $include_nt = true): bool
{
- $wordToTest = strtolower($wordToTest);
+ $wordToTest = strtolower((string) $wordToTest);
if (in_array($wordToTest, Config::NEGATE)) {
return true;
}
- if ($include_nt) {
- if (strpos($wordToTest, "n't")) {
- return true;
- }
- }
-
- return false;
+ return $include_nt && strpos($wordToTest, "n't");
}
/*
Convert lexicon file to a dictionary
*/
- public function make_lex_dict()
+ /**
+ * @return string[]
+ */
+ public function make_lex_dict(): array
{
$lex_dict = [];
- $fp = fopen($this->lexicon_file, "r");
+ $fp = fopen($this->lexicon_file, 'r');
if (!$fp) {
- die("Cannot load lexicon file");
+ die('Cannot load lexicon file');
}
while (($line = fgets($fp, 4096)) !== false) {
- list($word, $measure) = explode("\t", trim($line));
+ [$word, $measure] = explode("\t", trim($line));
//.strip().split('\t')[0:2]
$lex_dict[$word] = $measure;
//lex_dict[word] = float(measure)
@@ -67,95 +72,97 @@ public function make_lex_dict()
return $lex_dict;
}
-
- public function make_emoji_dict() {
+ /**
+ * @return string[]
+ */
+ public function make_emoji_dict(): array
+ {
$emoji_dict = [];
- $fp = fopen($this->emoji_lexicon, "r");
+ $fp = fopen($this->emoji_lexicon, 'r');
if (!$fp) {
- die("Cannot load emoji lexicon file");
+ die('Cannot load emoji lexicon file');
}
while (($line = fgets($fp, 4096)) !== false) {
- list($emoji, $description) = explode("\t", trim($line));
+ [$emoji, $description] = explode("\t", trim($line));
//.strip().split('\t')[0:2]
$emoji_dict[$emoji] = $description;
//lex_dict[word] = float(measure)
}
+
return $emoji_dict;
}
- public function updateLexicon($arr)
+ public function updateLexicon($arr): ?array
{
- if(!is_array($arr)) return [];
- $lexicon = [];
+ if (!is_array($arr)) {
+ return [];
+ }
foreach ($arr as $word => $valence) {
- $this->lexicon[strtolower($word)] = is_numeric($valence)? $valence : 0;
+ $this->lexicon[strtolower((string) $word)] = is_numeric($valence) ? $valence : 0;
}
- }
- private function IsKindOf($firstWord, $secondWord)
- {
- return "kind" === strtolower($firstWord) && "of" === strtolower($secondWord);
+ return null;
}
- private function IsBoosterWord($word)
+ private function IsBoosterWord($word): bool
{
- return array_key_exists(strtolower($word), Config::BOOSTER_DICT);
+ return array_key_exists(strtolower((string) $word), Config::BOOSTER_DICT);
}
- private function getBoosterScaler($word)
+ private function getBoosterScaler($word): float
{
- return Config::BOOSTER_DICT[strtolower($word)];
+ return Config::BOOSTER_DICT[strtolower((string) $word)];
}
- private function IsInLexicon($word)
+ private function IsInLexicon($word): bool
{
- $lowercase = strtolower($word);
+ $lowercase = strtolower((string) $word);
return array_key_exists($lowercase, $this->lexicon);
}
- private function IsUpperCaseWord($word)
+ private function IsUpperCaseWord($word): bool
{
- return ctype_upper($word);
+ return ctype_upper((string) $word);
}
private function getValenceFromLexicon($word)
{
- return $this->lexicon[strtolower($word)];
+ return $this->lexicon[strtolower((string) $word)];
}
- private function getTargetWordFromContext($wordInContext)
+ private function getTargetWordFromContext(array $wordInContext)
{
- return $wordInContext[count($wordInContext)-1];
+ return $wordInContext[count($wordInContext) - 1];
}
/*
Gets the precedding two words to check for emphasis
*/
- private function getWordInContext($wordList, $currentWordPosition)
+ private function getWordInContext($wordList, int $currentWordPosition): array
{
- $precedingWordList =[];
+ $precedingWordList = [];
//push the actual word on to the context list
array_unshift($precedingWordList, $wordList[$currentWordPosition]);
//If the word position is greater than 2 then we know we are not going to overflow
- if (($currentWordPosition-1)>=0) {
- array_unshift($precedingWordList, $wordList[$currentWordPosition-1]);
+ if (($currentWordPosition - 1) >= 0) {
+ array_unshift($precedingWordList, $wordList[$currentWordPosition - 1]);
} else {
- array_unshift($precedingWordList, "");
+ array_unshift($precedingWordList, '');
}
- if (($currentWordPosition-2)>=0) {
- array_unshift($precedingWordList, $wordList[$currentWordPosition-2]);
+ if (($currentWordPosition - 2) >= 0) {
+ array_unshift($precedingWordList, $wordList[$currentWordPosition - 2]);
} else {
- array_unshift($precedingWordList, "");
+ array_unshift($precedingWordList, '');
}
- if (($currentWordPosition-3)>=0) {
- array_unshift($precedingWordList, $wordList[$currentWordPosition-3]);
+ if (($currentWordPosition - 3) >= 0) {
+ array_unshift($precedingWordList, $wordList[$currentWordPosition - 3]);
} else {
- array_unshift($precedingWordList, "");
+ array_unshift($precedingWordList, '');
}
return $precedingWordList;
@@ -166,13 +173,12 @@ private function getWordInContext($wordList, $currentWordPosition)
Positive values are positive valence, negative value are negative
valence.
*/
- public function getSentiment($text)
+ public function getSentiment($text): array
{
-
$text_no_emoji = '';
$prev_space = true;
- foreach($this->str_split_unicode($text) as $unichr ) {
+ foreach ($this->str_split_unicode($text) as $unichr) {
if (array_key_exists($unichr, $this->emojis)) {
$description = $this->emojis[$unichr];
if (!($prev_space)) {
@@ -180,41 +186,36 @@ public function getSentiment($text)
}
$text_no_emoji .= $description;
$prev_space = false;
- }
- else {
+ } else {
$text_no_emoji .= $unichr;
$prev_space = ($unichr == ' ');
}
}
$text = trim($text_no_emoji);
- $this->current_sentitext = new SentiText($text);
+ $this->sentiText = new SentiText($text);
$sentiments = [];
- $words_and_emoticons = $this->current_sentitext->words_and_emoticons;
+ $words_and_emoticons = $this->sentiText->words_and_emoticons;
- for ($i=0; $i<=count($words_and_emoticons)-1; $i++) {
+ for ($i = 0; $i <= count($words_and_emoticons) - 1; $i++) {
$valence = 0.0;
$wordBeingTested = $words_and_emoticons[$i];
//If this is a booster word add a 0 valances then go to next word as it does not express sentiment directly
- /* if ($this->IsBoosterWord($wordBeingTested)){
- echo "\t\tThe word is a booster word: setting sentiment to 0.0\n";
- }*/
- //var_dump($i);
+ /* if ($this->IsBoosterWord($wordBeingTested)){
+ echo "\t\tThe word is a booster word: setting sentiment to 0.0\n";
+ }*/
+ //var_dump($i);
//If the word is not in the Lexicon then it does not express sentiment. So just ignore it.
- if ($this->IsInLexicon($wordBeingTested)) {
-
- //Special case because kind is in the lexicon so the modifier kind of needs to be skipped
- if ("kind" !=$words_and_emoticons[$i] && "of" != $words_and_emoticons[$i]) {
- $valence = $this->getValenceFromLexicon($wordBeingTested);
-
- $wordInContext = $this->getWordInContext($words_and_emoticons, $i);
- //If we are here then we have a word that enhance booster words
- $valence = $this->adjustBoosterSentiment($wordInContext, $valence);
- }
+ //Special case because kind is in the lexicon so the modifier kind of needs to be skipped
+ if ($this->IsInLexicon($wordBeingTested) && ('kind' != $words_and_emoticons[$i] && 'of' != $words_and_emoticons[$i])) {
+ $valence = $this->getValenceFromLexicon($wordBeingTested);
+ $wordInContext = $this->getWordInContext($words_and_emoticons, $i);
+ //If we are here then we have a word that enhance booster words
+ $valence = $this->adjustBoosterSentiment($wordInContext, $valence);
}
- array_push($sentiments, $valence);
+ $sentiments[] = $valence;
}
//Once we have a sentiment for each word adjust the sentimest if but is present
$sentiments = $this->_but_check($words_and_emoticons, $sentiments);
@@ -222,23 +223,24 @@ public function getSentiment($text)
return $this->score_valence($sentiments, $text);
}
-
- private function str_split_unicode($str, $l = 0) {
+ private function str_split_unicode($str, $l = 0)
+ {
if ($l > 0) {
- $ret = array();
- $len = mb_strlen($str, "UTF-8");
+ $ret = [];
+ $len = mb_strlen((string) $str, 'UTF-8');
for ($i = 0; $i < $len; $i += $l) {
- $ret[] = mb_substr($str, $i, $l, "UTF-8");
+ $ret[] = mb_substr((string) $str, $i, $l, 'UTF-8');
}
+
return $ret;
}
- return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
- }
+ return preg_split('//u', (string) $str, -1, PREG_SPLIT_NO_EMPTY);
+ }
private function applyValenceCapsBoost($targetWord, $valence)
{
- if ($this->IsUpperCaseWord($targetWord) && $this->current_sentitext->is_cap_diff) {
+ if ($this->IsUpperCaseWord($targetWord) && $this->sentiText->is_cap_diff) {
if ($valence > 0) {
$valence += Config::C_INCR;
} else {
@@ -274,24 +276,24 @@ private function boosterScaleAdjustment($word, $valence)
// dampen the scalar modifier of preceding words and emoticons
// (excluding the ones that immediately preceed the item) based
// on their distance from the current item.
- private function dampendBoosterScalerByPosition($booster, $position)
+ private function dampendBoosterScalerByPosition($booster, int $position)
{
- if (0===$booster) {
+ if (0 === $booster) {
return $booster;
}
- if (1==$position) {
- return $booster*0.95;
+ if (1 == $position) {
+ return $booster * 0.95;
}
- if (2==$position) {
- return $booster*0.9;
+ if (2 == $position) {
+ return $booster * 0.9;
}
return $booster;
}
- private function adjustBoosterSentiment($wordInContext, $valence)
+ private function adjustBoosterSentiment(array $wordInContext, $valence)
{
//The target word is always the last word
$targetWord = $this->getTargetWordFromContext($wordInContext);
@@ -299,21 +301,19 @@ private function adjustBoosterSentiment($wordInContext, $valence)
//check if sentiment laden word is in ALL CAPS (while others aren't) and apply booster
$valence = $this->applyValenceCapsBoost($targetWord, $valence);
- $valence = $this->modifyValenceBasedOnContext($wordInContext, $valence);
-
- return $valence;
+ return $this->modifyValenceBasedOnContext($wordInContext, $valence);
}
- private function modifyValenceBasedOnContext($wordInContext, $valence)
+ private function modifyValenceBasedOnContext(array $wordInContext, $valence)
{
- $wordToTest = $this->getTargetWordFromContext($wordInContext);
- //if($this->IsInLexicon($wordToTest)){
- // continue;
- //}
- for ($i=0; $igetTargetWordFromContext($wordInContext);
+ //if($this->IsInLexicon($wordToTest)){
+ // continue;
+ //}
+ for ($i = 0; $i < count($wordInContext) - 1; $i++) {
$scalarValue = $this->boosterScaleAdjustment($wordInContext[$i], $valence);
$scalarValue = $this->dampendBoosterScalerByPosition($scalarValue, $i);
- $valence = $valence+$scalarValue;
+ $valence += $scalarValue;
}
$valence = $this->_never_check($wordInContext, $valence);
@@ -336,11 +336,9 @@ public function _least_check($wordInContext, $valence)
{
// check for negation case using "least"
//if the previous word is least"
- if (strtolower($wordInContext[2]) == "least") {
- //but not "at least {word}" "very least {word}"
- if (strtolower($wordInContext[1]) != "at" && strtolower($wordInContext[1]) != "very") {
- $valence = $valence*Config::N_SCALAR;
- }
+ //but not "at least {word}" "very least {word}"
+ if (strtolower((string) $wordInContext[2]) === 'least' && (strtolower((string) $wordInContext[1]) !== 'at' && strtolower((string) $wordInContext[1]) !== 'very')) {
+ $valence *= Config::N_SCALAR;
}
return $valence;
@@ -349,16 +347,17 @@ public function _least_check($wordInContext, $valence)
public function _but_check($words_and_emoticons, $sentiments)
{
// check for modification in sentiment due to contrastive conjunction 'but'
- $bi = array_search("but", $words_and_emoticons);
+ $bi = array_search('but', $words_and_emoticons);
if (!$bi) {
- $bi = array_search("BUT", $words_and_emoticons);
+ $bi = array_search('BUT', $words_and_emoticons);
}
if ($bi) {
- for ($si=0; $si$bi) {
- $sentiments[$si] = $sentiments[$si]*1.5;
+ $counter = count($sentiments);
+ for ($si = 0; $si < $counter; $si++) {
+ if ($si < $bi) {
+ $sentiments[$si] *= 0.5;
+ } elseif ($si > $bi) {
+ $sentiments[$si] *= 1.5;
}
}
}
@@ -368,48 +367,44 @@ public function _but_check($words_and_emoticons, $sentiments)
public function _idioms_check($wordInContext, $valence)
{
- $onezero = sprintf("%s %s", $wordInContext[2], $wordInContext[3]);
-
- $twoonezero = sprintf("%s %s %s", $wordInContext[1], $wordInContext[2], $wordInContext[3]);
-
- $twoone = sprintf("%s %s", $wordInContext[1], $wordInContext[2]);
+ $onezero = sprintf('%s %s', $wordInContext[2], $wordInContext[3]);
- $threetwoone = sprintf("%s %s %s", $wordInContext[0], $wordInContext[1], $wordInContext[2]);
+ $twoonezero = sprintf('%s %s %s', $wordInContext[1], $wordInContext[2], $wordInContext[3]);
- $threetwo = sprintf("%s %s", $wordInContext[0], $wordInContext[1]);
+ $twoone = sprintf('%s %s', $wordInContext[1], $wordInContext[2]);
- $zeroone = sprintf("%s %s", $wordInContext[3], $wordInContext[2]);
+ $threetwoone = sprintf('%s %s %s', $wordInContext[0], $wordInContext[1], $wordInContext[2]);
- $zeroonetwo = sprintf("%s %s %s", $wordInContext[3], $wordInContext[2], $wordInContext[1]);
+ $threetwo = sprintf('%s %s', $wordInContext[0], $wordInContext[1]);
$sequences = [$onezero, $twoonezero, $twoone, $threetwoone, $threetwo];
- foreach ($sequences as $seq) {
- $key = strtolower($seq);
+ foreach ($sequences as $sequence) {
+ $key = strtolower($sequence);
if (array_key_exists($key, Config::SPECIAL_CASE_IDIOMS)) {
$valence = Config::SPECIAL_CASE_IDIOMS[$key];
break;
}
-/*
- Positive idioms check. Not implementing it yet
- if(count($words_and_emoticons)-1 > $i){
- $zeroone = sprintf("%s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1]);
- if (in_array($zeroone, Config::SPECIAL_CASE_IDIOMS)){
- $valence = Config::SPECIAL_CASE_IDIOMS[$zeroone];
- }
- }
- if(count($words_and_emoticons)-1 > $i+1){
- $zeroonetwo = sprintf("%s %s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1], $words_and_emoticons[$i+2]);
- if (in_array($zeroonetwo, Config::SPECIAL_CASE_IDIOMS)){
- $valence = Config::SPECIAL_CASE_IDIOMS[$zeroonetwo];
- }
- }
-*/
+ /*
+ Positive idioms check. Not implementing it yet
+ if(count($words_and_emoticons)-1 > $i){
+ $zeroone = sprintf("%s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1]);
+ if (in_array($zeroone, Config::SPECIAL_CASE_IDIOMS)){
+ $valence = Config::SPECIAL_CASE_IDIOMS[$zeroone];
+ }
+ }
+ if(count($words_and_emoticons)-1 > $i+1){
+ $zeroonetwo = sprintf("%s %s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1], $words_and_emoticons[$i+2]);
+ if (in_array($zeroonetwo, Config::SPECIAL_CASE_IDIOMS)){
+ $valence = Config::SPECIAL_CASE_IDIOMS[$zeroonetwo];
+ }
+ }
+ */
// check for booster/dampener bi-grams such as 'sort of' or 'kind of'
if ($this->IsBoosterWord($threetwo) || $this->IsBoosterWord($twoone)) {
- $valence = $valence+Config::B_DECR;
+ $valence += Config::B_DECR;
}
}
@@ -420,12 +415,12 @@ public function _never_check($wordInContext, $valance)
{
//If the sentiment word is preceded by never so/this we apply a modifier
$neverModifier = 0;
- if ("never" == $wordInContext[0]) {
+ if ('never' == $wordInContext[0]) {
$neverModifier = 1.25;
- } else if ("never" == $wordInContext[1]) {
+ } elseif ('never' == $wordInContext[1]) {
$neverModifier = 1.5;
}
- if ("so" == $wordInContext[1] || "so"== $wordInContext[2] || "this" == $wordInContext[1] || "this" == $wordInContext[2]) {
+ if ('so' == $wordInContext[1] || 'so' == $wordInContext[2] || 'this' == $wordInContext[1] || 'this' == $wordInContext[2]) {
$valance *= $neverModifier;
}
@@ -439,58 +434,58 @@ public function _never_check($wordInContext, $valance)
return $valance;
}
- public function _sentiment_laden_idioms_check($valence, $senti_text_lower){
- # Future Work
- # check for sentiment laden idioms that don't contain a lexicon word
+ public function _sentiment_laden_idioms_check($valence, $senti_text_lower): float|int
+ {
+ // Future Work
+ // check for sentiment laden idioms that don't contain a lexicon word
$idioms_valences = [];
- foreach (Config::SENTIMENT_LADEN_IDIOMS as $idiom) {
- if(in_array($idiom, $senti_text_lower)){
+ foreach (Config::SENTIMENT_LADEN_IDIOMS as $idiom => $valence) {
+ if (in_array($idiom, $senti_text_lower)) {
//print($idiom, $senti_text_lower)
- $valence = Config::SENTIMENT_LADEN_IDIOMS[$idiom];
$idioms_valences[] = $valence;
}
}
- if ((strlen($idioms_valences) > 0)) {
- $valence = ( array_sum( explode( ',', $idioms_valences ) ) / floatval(strlen($idioms_valences)));
+ if (count($idioms_valences) > 0) {
+ return array_sum($idioms_valences) / floatval(count($idioms_valences));
}
+
return $valence;
}
- public function _punctuation_emphasis($sum_s, $text)
+ public function _punctuation_emphasis($sum_s, $text): float|int
{
// add emphasis from exclamation points and question marks
$ep_amplifier = $this->_amplify_ep($text);
$qm_amplifier = $this->_amplify_qm($text);
- $punct_emph_amplifier = $ep_amplifier+$qm_amplifier;
- return $punct_emph_amplifier;
+ return $ep_amplifier + $qm_amplifier;
}
- public function _amplify_ep($text)
+ public function _amplify_ep($text): float
{
// check for added emphasis resulting from exclamation points (up to 4 of them)
- $ep_count = substr_count($text, "!");
+ $ep_count = substr_count((string) $text, '!');
if ($ep_count > 4) {
$ep_count = 4;
}
- # (empirically derived mean sentiment intensity rating increase for
- # exclamation points)
- $ep_amplifier = $ep_count*0.292;
+ // (empirically derived mean sentiment intensity rating increase for
+ // exclamation points)
+ $ep_amplifier = $ep_count * 0.292;
return $ep_amplifier;
}
- public function _amplify_qm($text)
+ public function _amplify_qm($text): float|int
{
- # check for added emphasis resulting from question marks (2 or 3+)
- $qm_count = substr_count($text, "?");
+ // check for added emphasis resulting from question marks (2 or 3+)
+ $qm_count = substr_count((string) $text, '?');
$qm_amplifier = 0;
if ($qm_count > 1) {
if ($qm_count <= 3) {
- # (empirically derived mean sentiment intensity rating increase for
- # question marks)
- $qm_amplifier = $qm_count*0.18;
+ // (empirically derived mean sentiment intensity rating increase for
+ // question marks)
+ $qm_amplifier = $qm_count * 0.18;
} else {
$qm_amplifier = 0.96;
}
@@ -499,20 +494,20 @@ public function _amplify_qm($text)
return $qm_amplifier;
}
- public function _sift_sentiment_scores($sentiments)
+ public function _sift_sentiment_scores($sentiments): array
{
- # want separate positive versus negative sentiment scores
+ // want separate positive versus negative sentiment scores
$pos_sum = 0.0;
$neg_sum = 0.0;
$neu_count = 0;
- foreach ($sentiments as $sentiment_score) {
- if ($sentiment_score > 0) {
- $pos_sum += $sentiment_score +1; # compensates for neutral words that are counted as 1
+ foreach ($sentiments as $sentiment) {
+ if ($sentiment > 0) {
+ $pos_sum += $sentiment + 1; // compensates for neutral words that are counted as 1
}
- if ($sentiment_score < 0) {
- $neg_sum += $sentiment_score -1; # when used with math.fabs(), compensates for neutrals
+ if ($sentiment < 0) {
+ $neg_sum += $sentiment - 1; // when used with math.fabs(), compensates for neutrals
}
- if ($sentiment_score == 0) {
+ if ($sentiment == 0) {
$neu_count += 1;
}
}
@@ -520,11 +515,11 @@ public function _sift_sentiment_scores($sentiments)
return [$pos_sum, $neg_sum, $neu_count];
}
- public function score_valence($sentiments, $text)
+ public function score_valence($sentiments, $text): array
{
if ($sentiments) {
$sum_s = array_sum($sentiments);
- # compute and add emphasis from punctuation in text
+ // compute and add emphasis from punctuation in text
$punct_emph_amplifier = $this->_punctuation_emphasis($sum_s, $text);
if ($sum_s > 0) {
$sum_s += $punct_emph_amplifier;
@@ -533,8 +528,8 @@ public function score_valence($sentiments, $text)
}
$compound = Config::normalize($sum_s);
- # discriminate between positive, negative and neutral sentiment scores
- list($pos_sum, $neg_sum, $neu_count) = $this->_sift_sentiment_scores($sentiments);
+ // discriminate between positive, negative and neutral sentiment scores
+ [$pos_sum, $neg_sum, $neu_count] = $this->_sift_sentiment_scores($sentiments);
if ($pos_sum > abs($neg_sum)) {
$pos_sum += $punct_emph_amplifier;
@@ -543,7 +538,7 @@ public function score_valence($sentiments, $text)
}
$total = $pos_sum + abs($neg_sum) + $neu_count;
- $pos =abs($pos_sum / $total);
+ $pos = abs($pos_sum / $total);
$neg = abs($neg_sum / $total);
$neu = abs($neu_count / $total);
} else {
@@ -553,12 +548,9 @@ public function score_valence($sentiments, $text)
$neu = 0.0;
}
- $sentiment_dict =
- ["neg" => round($neg, 3),
- "neu" => round($neu, 3),
- "pos" => round($pos, 3),
- "compound" => round($compound, 4)];
-
- return $sentiment_dict;
+ return ['neg' => round($neg, 3),
+ 'neu' => round($neu, 3),
+ 'pos' => round($pos, 3),
+ 'compound' => round($compound, 4)];
}
}
diff --git a/src/Config/Config.php b/src/Config/Config.php
index 05f0304..afe529a 100644
--- a/src/Config/Config.php
+++ b/src/Config/Config.php
@@ -7,69 +7,65 @@
*/
class Config
{
-
-
// (empirically derived mean sentiment intensity rating increase for booster words)
- const B_INCR = 0.293;
- const B_DECR = -0.293;
+ public const B_INCR = 0.293;
+ public const B_DECR = -0.293;
// (empirically derived mean sentiment intensity rating increase for using
// ALLCAPs to emphasize a word)
- const C_INCR = 0.733;
+ public const C_INCR = 0.733;
- const N_SCALAR = -0.74;
+ public const N_SCALAR = -0.74;
// for removing punctuation
//const REGEX_REMOVE_PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation))
-
- const NEGATE = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
+
+ public const NEGATE = ['aint', 'arent', 'cannot', 'cant', 'couldnt', 'darent', 'didnt', 'doesnt',
"ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
- "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
+ 'dont', 'hadnt', 'hasnt', 'havent', 'isnt', 'mightnt', 'mustnt', 'neither',
"don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
- "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
- "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
- "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
- "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"];
+ 'neednt', "needn't", 'never', 'none', 'nope', 'nor', 'not', 'nothing', 'nowhere',
+ 'oughtnt', 'shant', 'shouldnt', 'uhuh', 'wasnt', 'werent',
+ "oughtn't", "shan't", "shouldn't", 'uh-uh', "wasn't", "weren't",
+ 'without', 'wont', 'wouldnt', "won't", "wouldn't", 'rarely', 'seldom', 'despite'];
//booster/dampener 'intensifiers' or 'degree adverbs'
//http://en.wiktionary.org/wiki/Category:English_degree_adverbs
- const BOOSTER_DICT = ["absolutely"=> self::B_INCR, "amazingly"=> self::B_INCR, "awfully"=> self::B_INCR, "completely"=> self::B_INCR, "considerably"=> self::B_INCR,
- "decidedly"=> self::B_INCR, "deeply"=> self::B_INCR, "effing"=> self::B_INCR,"enormous"=> self::B_INCR, "enormously"=> self::B_INCR,
- "entirely"=> self::B_INCR, "especially"=> self::B_INCR, "exceptionally"=> self::B_INCR, "extremely"=> self::B_INCR,
- "fabulously"=> self::B_INCR, "flipping"=> self::B_INCR, "flippin"=> self::B_INCR,
- "fricking"=> self::B_INCR, "frickin"=> self::B_INCR, "frigging"=> self::B_INCR, "friggin"=> self::B_INCR, "fully"=> self::B_INCR, "fucking"=> self::B_INCR,
- "greatly"=> self::B_INCR, "hella"=> self::B_INCR, "highly"=> self::B_INCR, "hugely"=> self::B_INCR, "incredibly"=> self::B_INCR,
- "intensely"=> self::B_INCR, "majorly"=> self::B_INCR, "more"=> self::B_INCR, "most"=> self::B_INCR, "particularly"=> self::B_INCR,
- "purely"=> self::B_INCR, "quite"=> self::B_INCR, "seemingly" => self::B_INCR, "really"=> self::B_INCR, "remarkably"=> self::B_INCR,
- "so"=> self::B_INCR, "substantially"=> self::B_INCR,
- "thoroughly"=> self::B_INCR, "totally"=> self::B_INCR, "tremendous"=> self::B_INCR, "tremendously"=> self::B_INCR,
- "uber"=> self::B_INCR, "unbelievably"=> self::B_INCR, "unusually"=> self::B_INCR, "utterly"=> self::B_INCR,
- "very"=> self::B_INCR,
- "almost"=> self::B_DECR, "barely"=> self::B_DECR, "hardly"=> self::B_DECR, "just enough"=> self::B_DECR,
- "kind of"=> self::B_DECR, "kinda"=> self::B_DECR, "kindof"=> self::B_DECR, "kind-of"=> self::B_DECR,
- "less"=> self::B_DECR, "little"=> self::B_DECR, "marginally"=> self::B_DECR, "occasional"=> self::B_DECR, "occasionally"=> self::B_DECR, "partly"=> self::B_DECR,
- "scarcely"=> self::B_DECR, "slightly"=> self::B_DECR, "somewhat"=> self::B_DECR,
- "sort of"=> self::B_DECR, "sorta"=> self::B_DECR, "sortof"=> self::B_DECR, "sort-of"=> self::B_DECR];
-
+ public const BOOSTER_DICT = ['absolutely' => self::B_INCR, 'amazingly' => self::B_INCR, 'awfully' => self::B_INCR, 'completely' => self::B_INCR, 'considerably' => self::B_INCR,
+ 'decidedly' => self::B_INCR, 'deeply' => self::B_INCR, 'effing' => self::B_INCR, 'enormous' => self::B_INCR, 'enormously' => self::B_INCR,
+ 'entirely' => self::B_INCR, 'especially' => self::B_INCR, 'exceptionally' => self::B_INCR, 'extremely' => self::B_INCR,
+ 'fabulously' => self::B_INCR, 'flipping' => self::B_INCR, 'flippin' => self::B_INCR,
+ 'fricking' => self::B_INCR, 'frickin' => self::B_INCR, 'frigging' => self::B_INCR, 'friggin' => self::B_INCR, 'fully' => self::B_INCR, 'fucking' => self::B_INCR,
+ 'greatly' => self::B_INCR, 'hella' => self::B_INCR, 'highly' => self::B_INCR, 'hugely' => self::B_INCR, 'incredibly' => self::B_INCR,
+ 'intensely' => self::B_INCR, 'majorly' => self::B_INCR, 'more' => self::B_INCR, 'most' => self::B_INCR, 'particularly' => self::B_INCR,
+ 'purely' => self::B_INCR, 'quite' => self::B_INCR, 'seemingly' => self::B_INCR, 'really' => self::B_INCR, 'remarkably' => self::B_INCR,
+ 'so' => self::B_INCR, 'substantially' => self::B_INCR,
+ 'thoroughly' => self::B_INCR, 'totally' => self::B_INCR, 'tremendous' => self::B_INCR, 'tremendously' => self::B_INCR,
+ 'uber' => self::B_INCR, 'unbelievably' => self::B_INCR, 'unusually' => self::B_INCR, 'utterly' => self::B_INCR,
+ 'very' => self::B_INCR,
+ 'almost' => self::B_DECR, 'barely' => self::B_DECR, 'hardly' => self::B_DECR, 'just enough' => self::B_DECR,
+ 'kind of' => self::B_DECR, 'kinda' => self::B_DECR, 'kindof' => self::B_DECR, 'kind-of' => self::B_DECR,
+ 'less' => self::B_DECR, 'little' => self::B_DECR, 'marginally' => self::B_DECR, 'occasional' => self::B_DECR, 'occasionally' => self::B_DECR, 'partly' => self::B_DECR,
+ 'scarcely' => self::B_DECR, 'slightly' => self::B_DECR, 'somewhat' => self::B_DECR,
+ 'sort of' => self::B_DECR, 'sorta' => self::B_DECR, 'sortof' => self::B_DECR, 'sort-of' => self::B_DECR];
- # check for sentiment laden idioms that do not contain lexicon words (future work, not yet implemented)
- const SENTIMENT_LADEN_IDIOMS = ["cut the mustard"=> 2, "hand to mouth"=> -2,
- "back handed"=> -2, "blow smoke"=> -2, "blowing smoke"=> -2,
- "upper hand"=> 1, "break a leg"=> 2,
- "cooking with gas"=> 2, "in the black"=> 2, "in the red"=> -2,
- "on the ball"=> 2, "under the weather"=> -2];
+ // check for sentiment laden idioms that do not contain lexicon words (future work, not yet implemented)
+ public const SENTIMENT_LADEN_IDIOMS = ['cut the mustard' => 2, 'hand to mouth' => -2,
+ 'back handed' => -2, 'blow smoke' => -2, 'blowing smoke' => -2,
+ 'upper hand' => 1, 'break a leg' => 2,
+ 'cooking with gas' => 2, 'in the black' => 2, 'in the red' => -2,
+ 'on the ball' => 2, 'under the weather' => -2];
// check for special case idioms using a sentiment-laden keyword known to SAGE
- const SPECIAL_CASE_IDIOMS = ["the shit"=> 3, "the bomb"=> 3, "bad ass"=> 1.5, "bus stop"=> 0.0, "yeah right"=> -2, "cut the mustard"=> 2, "kiss of death"=> -1.5, "hand to mouth"=> -2, "beating heart"=> 3.1,"broken heart"=> -2.9, "to die for"=> 3];
- ##Static methods##
+ public const SPECIAL_CASE_IDIOMS = ['the shit' => 3, 'the bomb' => 3, 'bad ass' => 1.5, 'bus stop' => 0.0, 'yeah right' => -2, 'cut the mustard' => 2, 'kiss of death' => -1.5, 'hand to mouth' => -2, 'beating heart' => 3.1, 'broken heart' => -2.9, 'to die for' => 3];
+ //#Static methods##
/*
Normalize the score to be between -1 and 1 using an alpha that
approximates the max expected value
*/
- public static function normalize($score, $alpha = 15)
+ public static function normalize($score, $alpha = 15): float
{
- $norm_score = $score/sqrt(($score*$score) + $alpha);
- return $norm_score;
+ return $score / sqrt(($score * $score) + $alpha);
}
}
diff --git a/src/Procedures/SentiText.php b/src/Procedures/SentiText.php
index 7a0773f..71bec6d 100644
--- a/src/Procedures/SentiText.php
+++ b/src/Procedures/SentiText.php
@@ -8,22 +8,14 @@
class SentiText
{
+ public $words_and_emoticons;
+ public $is_cap_diff;
- private $text = "";
- public $words_and_emoticons = null;
- public $is_cap_diff = null;
+ public const PUNC_LIST = ['.', '!', '?', ',', ';', ':', '-', "'", '"',
+ '!!', '!!!', '??', '???', '?!?', '!?!', '?!?!', '!?!?'];
- const PUNC_LIST = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
- "!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"];
-
-
- function __construct($text)
+ public function __construct(private $text)
{
- //checking that is string
- //if (!isinstance(text, str)){
- // text = str(text.encode('utf-8'));
- //}
- $this->text = $text;
$this->words_and_emoticons = $this->_words_and_emoticons();
// doesn't separate words from\
// adjacent punctuation (keeps emoticons & contractions)
@@ -33,18 +25,19 @@ function __construct($text)
/*
Remove all punctation from a string
*/
- function strip_punctuation($string)
+ public function strip_punctuation($string): string|array|null
{
//$string = strtolower($string);
- return preg_replace("/[[:punct:]]+/", "", $string);
+ return preg_replace('/[[:punct:]]+/', '', (string) $string);
}
- function array_count_values_of($haystack, $needle)
+ public function array_count_values_of($haystack, $needle): int
{
if (!in_array($needle, $haystack, true)) {
return 0;
}
$counts = array_count_values($haystack);
+
return $counts[$needle];
}
@@ -56,67 +49,62 @@ function array_count_values_of($haystack, $needle)
*/
private function allcap_differential($words)
{
-
$is_different = false;
$allcap_words = 0;
foreach ($words as $word) {
//ctype is affected by the local of the processor see manual for more details
- if (ctype_upper($word)) {
+ if (ctype_upper((string) $word)) {
$allcap_words += 1;
}
}
$cap_differential = count($words) - $allcap_words;
if ($cap_differential > 0 && $cap_differential < count($words)) {
- $is_different = true;
+ return true;
}
+
return $is_different;
}
- function _words_only()
+ public function _words_only()
{
$text_mod = $this->strip_punctuation($this->text);
// removes punctuation (but loses emoticons & contractions)
- $words_only = preg_split('/\s+/', $text_mod);
- # get rid of empty items or single letter "words" like 'a' and 'I'
- $works_only = array_filter($words_only, function ($word) {
- return strlen($word) > 1;
- });
+ $words_only = preg_split('/\s+/', (string) $text_mod);
+ // get rid of empty items or single letter "words" like 'a' and 'I'
+ array_filter($words_only, fn ($word): bool => strlen($word) > 1);
+
return $words_only;
}
- function _words_and_emoticons()
+ public function _words_and_emoticons()
{
+ $wes = preg_split('/\s+/', (string) $this->text);
- $wes = preg_split('/\s+/', $this->text);
-
- # get rid of residual empty items or single letter words
- $wes = array_filter($wes, function ($word) {
- return strlen($word) > 1;
- });
+ // get rid of residual empty items or single letter words
+ $wes = array_filter($wes, fn ($word): bool => strlen($word) > 1);
//Need to remap the indexes of the array
$wes = array_values($wes);
$words_only = $this->_words_only();
- foreach ($words_only as $word) {
+ foreach ($words_only as $word_only) {
foreach (self::PUNC_LIST as $punct) {
//replace all punct + word combinations with word
- $pword = $punct .$word;
-
+ $pword = $punct.$word_only;
$x1 = $this->array_count_values_of($wes, $pword);
while ($x1 > 0) {
$i = array_search($pword, $wes, true);
unset($wes[$i]);
- array_splice($wes, $i, 0, $word);
+ array_splice($wes, $i, 0, $word_only);
$x1 = $this->array_count_values_of($wes, $pword);
}
//Do the same as above but word then punct
- $wordp = $word . $punct;
+ $wordp = $word_only.$punct;
$x2 = $this->array_count_values_of($wes, $wordp);
while ($x2 > 0) {
$i = array_search($wordp, $wes, true);
unset($wes[$i]);
- array_splice($wes, $i, 0, $word);
+ array_splice($wes, $i, 0, $word_only);
$x2 = $this->array_count_values_of($wes, $wordp);
}
}
diff --git a/tests/Pest.php b/tests/Pest.php
new file mode 100644
index 0000000..89287f1
--- /dev/null
+++ b/tests/Pest.php
@@ -0,0 +1,45 @@
+extend(Sentiment\Tests\TestCase::class)->in('Feature');
+
+/*
+|--------------------------------------------------------------------------
+| Expectations
+|--------------------------------------------------------------------------
+|
+| When you're writing tests, you often need to check that values meet certain conditions. The
+| "expect()" function gives you access to a set of "expectations" methods that you can use
+| to assert different things. Of course, you may extend the Expectation API at any time.
+|
+*/
+
+expect()->extend('toBeOne', function () {
+ return $this->toBe(1);
+});
+
+/*
+|--------------------------------------------------------------------------
+| Functions
+|--------------------------------------------------------------------------
+|
+| While Pest is very powerful out-of-the-box, you may have some testing code specific to your
+| project that you don't want to repeat in every file. Here you can also expose helpers as
+| global functions to help you to reduce the number of lines of code in your test files.
+|
+*/
+
+function something()
+{
+ // ..
+}
diff --git a/tests/TestCase.php b/tests/TestCase.php
new file mode 100644
index 0000000..5e9e312
--- /dev/null
+++ b/tests/TestCase.php
@@ -0,0 +1,10 @@
+analyzer = new Analyzer();
+});
+
+describe('Analyzer', function () {
+ it('can be instantiated', function () {
+ expect($this->analyzer)->toBeInstanceOf(Analyzer::class);
+ });
+
+ it('analyzes positive sentences correctly', function () {
+ // Create a fresh analyzer instance for this test
+ $analyzer = new Analyzer();
+
+ // Test one simple positive sentence
+ $result = $analyzer->getSentiment('This is great');
+
+ // Ensure all keys are present
+ expect($result)->toHaveKeys(['neg', 'neu', 'pos', 'compound']);
+
+ // Positive score should be greater than negative
+ expect($result['pos'])->toBeGreaterThan($result['neg']);
+
+ // Compound should be positive (> 0 for clearly positive)
+ expect($result['compound'])->toBeGreaterThan(0);
+ });
+
+ it('analyzes negative sentences correctly', function () {
+ $negatives = [
+ 'This is terrible!' => ['neg' => true, 'compound' => true],
+ 'I hate this' => ['neg' => true, 'compound' => true],
+ 'Worst experience ever' => ['neg' => true, 'compound' => true],
+ 'This is awful' => ['neg' => true, 'compound' => true],
+ 'Completely disappointed' => ['neg' => true, 'compound' => true],
+ 'Horrible service' => ['neg' => true, 'compound' => true],
+ ];
+
+ foreach ($negatives as $text => $expectations) {
+ $result = $this->analyzer->getSentiment($text);
+
+ // Negative score should be greater than positive
+ expect($result['neg'])->toBeGreaterThan($result['pos']);
+
+ // Compound should be negative (< -0.05 for clearly negative)
+ expect($result['compound'])->toBeLessThan(-0.05);
+ }
+ });
+
+ it('analyzes neutral sentences correctly', function () {
+ $neutrals = [
+ 'The sky is blue',
+ 'Today is Monday',
+ 'The book is on the table',
+ 'Water is H2O',
+ 'The meeting is at 3pm',
+ ];
+
+ foreach ($neutrals as $text) {
+ $result = $this->analyzer->getSentiment($text);
+
+ // Neutral score should be dominant
+ expect($result['neu'])->toBeGreaterThan(0.5);
+
+ // Compound should be close to 0 (between -0.05 and 0.05)
+ expect($result['compound'])->toBeBetween(-0.05, 0.05);
+ }
+ });
+
+ it('handles emojis in sentiment analysis', function () {
+ $textsWithEmojis = [
+ 'I love this 😍' => ['positive' => true],
+ 'So sad 😢' => ['negative' => true],
+ 'Happy day 😊' => ['positive' => true],
+ 'Angry 😠' => ['negative' => true],
+ ];
+
+ foreach ($textsWithEmojis as $text => $expectation) {
+ $result = $this->analyzer->getSentiment($text);
+
+ if ($expectation['positive'] ?? false) {
+ expect($result['compound'])->toBeGreaterThan(0);
+ }
+ if ($expectation['negative'] ?? false) {
+ expect($result['compound'])->toBeLessThan(0);
+ }
+ }
+ });
+
+ it('handles negation correctly', function () {
+ // Create a fresh analyzer instance
+ $analyzer = new Analyzer();
+
+ // Test basic negation
+ $result = $analyzer->getSentiment('not good');
+
+ // "not good" should be negative
+ expect($result['compound'])->toBeLessThanOrEqual(0);
+ });
+
+ it('handles emphasis with punctuation', function () {
+ // Multiple exclamation marks should amplify sentiment
+ $regular = $this->analyzer->getSentiment('This is good');
+ $emphasized = $this->analyzer->getSentiment('This is good!!!');
+
+ // Emphasized should have stronger positive sentiment
+ expect(abs($emphasized['compound']))->toBeGreaterThan(abs($regular['compound']));
+
+ // Question marks can also affect sentiment
+ $question = $this->analyzer->getSentiment('This is good???');
+ expect($question)->toHaveKeys(['neg', 'neu', 'pos', 'compound']);
+ });
+
+ it('handles all caps for emphasis', function () {
+ $regular = $this->analyzer->getSentiment('this is amazing');
+ $allCaps = $this->analyzer->getSentiment('THIS IS AMAZING');
+
+ // All caps should amplify the sentiment
+ expect(abs($allCaps['compound']))->toBeGreaterThanOrEqual(abs($regular['compound']));
+ });
+
+ it('handles BUT conjunction correctly', function () {
+ // Sentiment after BUT should be weighted more heavily
+ $result = $this->analyzer->getSentiment('The food was great but the service was terrible');
+
+ // Should lean negative because negative part comes after BUT
+ expect($result['compound'])->toBeLessThan(0);
+
+ // Reverse case
+ $result2 = $this->analyzer->getSentiment('The service was terrible but the food was great');
+
+ // Should lean positive because positive part comes after BUT
+ expect($result2['compound'])->toBeGreaterThan(0);
+ });
+
+ it('returns consistent score structure', function () {
+ $result = $this->analyzer->getSentiment('Test sentence');
+
+ // Check all required keys exist
+ expect($result)->toHaveKeys(['neg', 'neu', 'pos', 'compound']);
+
+ // Check all values are numeric
+ expect($result['neg'])->toBeNumeric();
+ expect($result['neu'])->toBeNumeric();
+ expect($result['pos'])->toBeNumeric();
+ expect($result['compound'])->toBeNumeric();
+
+ // Check scores are normalized (sum to approximately 1)
+ $sum = $result['neg'] + $result['neu'] + $result['pos'];
+ expect($sum)->toBeBetween(0.999, 1.001);
+
+ // Check compound is between -1 and 1
+ expect($result['compound'])->toBeBetween(-1, 1);
+ });
+
+ it('handles empty and whitespace strings', function () {
+ $emptyResult = $this->analyzer->getSentiment('');
+ expect($emptyResult['compound'])->toBe(0.0);
+ expect($emptyResult['neg'])->toBe(0.0);
+ expect($emptyResult['pos'])->toBe(0.0);
+ expect($emptyResult['neu'])->toBe(0.0);
+
+ $whitespaceResult = $this->analyzer->getSentiment(' ');
+ expect($whitespaceResult['compound'])->toBe(0.0);
+ });
+
+ it('can update lexicon with custom words', function () {
+ // Add custom positive word
+ $this->analyzer->updateLexicon(['awesomesauce' => 3.0]);
+
+ $result = $this->analyzer->getSentiment('This is awesomesauce');
+ expect($result['compound'])->toBeGreaterThan(0);
+
+ // Add custom negative word
+ $this->analyzer->updateLexicon(['terribleawful' => -3.0]);
+
+ $result2 = $this->analyzer->getSentiment('This is terribleawful');
+ expect($result2['compound'])->toBeLessThan(0);
+ });
+
+ it('detects negation with IsNegated method', function () {
+ expect($this->analyzer->IsNegated('not'))->toBeTrue();
+ expect($this->analyzer->IsNegated('never'))->toBeTrue();
+ expect($this->analyzer->IsNegated("isn't"))->toBeTrue();
+ expect($this->analyzer->IsNegated("wouldn't"))->toBeTrue();
+ expect($this->analyzer->IsNegated('happy'))->toBeFalse();
+ // 'no' is not in the NEGATE array, so removed that test
+ });
+});
diff --git a/tests/Unit/ConfigTest.php b/tests/Unit/ConfigTest.php
new file mode 100644
index 0000000..321252a
--- /dev/null
+++ b/tests/Unit/ConfigTest.php
@@ -0,0 +1,138 @@
+toBeArray();
+ expect(Config::BOOSTER_DICT)->toBeArray();
+ expect(Config::SPECIAL_CASE_IDIOMS)->toBeArray();
+ expect(Config::SENTIMENT_LADEN_IDIOMS)->toBeArray();
+ });
+
+ it('has correct incremental values', function () {
+ expect(Config::B_INCR)->toBeNumeric();
+ expect(Config::B_DECR)->toBeNumeric();
+ expect(Config::C_INCR)->toBeNumeric();
+ expect(Config::N_SCALAR)->toBeNumeric();
+
+ // B_INCR should be positive
+ expect(Config::B_INCR)->toBeGreaterThan(0);
+ // B_DECR should be negative
+ expect(Config::B_DECR)->toBeLessThan(0);
+ });
+
+ it('contains expected negation words', function () {
+ $expectedNegations = ['not', 'never', 'neither', 'nowhere', 'nothing', 'none', 'without'];
+
+ foreach ($expectedNegations as $word) {
+ expect(Config::NEGATE)->toContain($word);
+ }
+ });
+
+ it('has booster words with correct values', function () {
+ // Check some known booster words
+ expect(Config::BOOSTER_DICT)->toHaveKey('absolutely');
+ expect(Config::BOOSTER_DICT['absolutely'])->toBeNumeric();
+
+ expect(Config::BOOSTER_DICT)->toHaveKey('very');
+ expect(Config::BOOSTER_DICT['very'])->toBeNumeric();
+
+ expect(Config::BOOSTER_DICT)->toHaveKey('slightly');
+ expect(Config::BOOSTER_DICT['slightly'])->toBeNumeric();
+
+ // Intensifiers should have positive values
+ expect(Config::BOOSTER_DICT['absolutely'])->toBeGreaterThan(0);
+ expect(Config::BOOSTER_DICT['very'])->toBeGreaterThan(0);
+
+ // Diminishers should have negative values
+ expect(Config::BOOSTER_DICT['slightly'])->toBeLessThan(0);
+ });
+
+ it('has special case idioms with sentiment values', function () {
+ // Check some known idioms
+ expect(Config::SPECIAL_CASE_IDIOMS)->toHaveKey('the shit');
+ expect(Config::SPECIAL_CASE_IDIOMS)->toHaveKey('the bomb');
+ expect(Config::SPECIAL_CASE_IDIOMS)->toHaveKey('bad ass');
+
+ // These should have numeric sentiment values
+ foreach (Config::SPECIAL_CASE_IDIOMS as $idiom => $value) {
+ expect($value)->toBeNumeric();
+ }
+ });
+
+ it('has sentiment laden idioms with values', function () {
+ // Check structure
+ expect(Config::SENTIMENT_LADEN_IDIOMS)->toBeArray();
+
+ // Check some known idioms
+ expect(Config::SENTIMENT_LADEN_IDIOMS)->toHaveKey('cut the mustard');
+ expect(Config::SENTIMENT_LADEN_IDIOMS)->toHaveKey('on the ball');
+
+ // All values should be numeric
+ foreach (Config::SENTIMENT_LADEN_IDIOMS as $idiom => $value) {
+ expect($value)->toBeNumeric();
+ }
+ });
+
+ it('normalizes scores correctly', function () {
+ // Test with different scores
+ $testCases = [
+ ['score' => 0, 'expected' => 0.0],
+ ['score' => 5, 'alpha' => 15, 'min' => 0.7, 'max' => 0.8],
+ ['score' => -5, 'alpha' => 15, 'min' => -0.8, 'max' => -0.7],
+ ['score' => 15, 'alpha' => 15, 'min' => 0.96, 'max' => 0.98],
+ ['score' => -15, 'alpha' => 15, 'min' => -0.98, 'max' => -0.96],
+ ];
+
+ foreach ($testCases as $test) {
+ $score = $test['score'];
+ $alpha = $test['alpha'] ?? 15;
+ $result = Config::normalize($score, $alpha);
+
+ // Result should be between -1 and 1
+ expect($result)->toBeBetween(-1, 1);
+
+ // Check expected value or range
+ if (isset($test['expected'])) {
+ expect($result)->toBe($test['expected']);
+ } elseif (isset($test['min']) && isset($test['max'])) {
+ expect($result)->toBeBetween($test['min'], $test['max']);
+ }
+ }
+ });
+
+ it('normalize function handles edge cases', function () {
+ // Very large positive score should approach 1
+ $largePositive = Config::normalize(1000, 15);
+ expect($largePositive)->toBeLessThan(1);
+ expect($largePositive)->toBeGreaterThan(0.95);
+
+ // Very large negative score should approach -1
+ $largeNegative = Config::normalize(-1000, 15);
+ expect($largeNegative)->toBeGreaterThan(-1);
+ expect($largeNegative)->toBeLessThan(-0.95);
+
+ // Zero should return zero
+ expect(Config::normalize(0))->toBe(0.0);
+ });
+
+ it('normalize function with different alpha values', function () {
+ $score = 10;
+
+ // Smaller alpha makes normalization more aggressive
+ $smallAlpha = Config::normalize($score, 5);
+ $normalAlpha = Config::normalize($score, 15);
+ $largeAlpha = Config::normalize($score, 50);
+
+ // With same score, smaller alpha should give larger normalized value
+ expect($smallAlpha)->toBeGreaterThan($normalAlpha);
+ expect($normalAlpha)->toBeGreaterThan($largeAlpha);
+
+ // All should still be between -1 and 1
+ expect($smallAlpha)->toBeBetween(0, 1);
+ expect($normalAlpha)->toBeBetween(0, 1);
+ expect($largeAlpha)->toBeBetween(0, 1);
+ });
+});
diff --git a/tests/Unit/SentiTextTest.php b/tests/Unit/SentiTextTest.php
new file mode 100644
index 0000000..5591b19
--- /dev/null
+++ b/tests/Unit/SentiTextTest.php
@@ -0,0 +1,159 @@
+toBeInstanceOf(SentiText::class);
+ });
+
+ it('extracts words and emoticons correctly', function () {
+ $sentiText = new SentiText('Hello world :) How are you?');
+ expect($sentiText->words_and_emoticons)->toBeArray();
+ expect($sentiText->words_and_emoticons)->toContain('Hello');
+ expect($sentiText->words_and_emoticons)->toContain('world');
+ expect($sentiText->words_and_emoticons)->toContain(':)');
+ });
+
+ it('strips punctuation correctly', function () {
+ $sentiText = new SentiText('test');
+
+ $result = $sentiText->strip_punctuation('Hello, world! How are you?');
+ expect($result)->toBe('Hello world How are you');
+
+ $result = $sentiText->strip_punctuation('Test... with... dots...');
+ expect($result)->toBe('Test with dots');
+
+ $result = $sentiText->strip_punctuation('No punctuation here');
+ expect($result)->toBe('No punctuation here');
+ });
+
+ it('counts array values correctly', function () {
+ $sentiText = new SentiText('test');
+
+ $haystack = ['apple', 'banana', 'apple', 'cherry', 'apple'];
+
+ expect($sentiText->array_count_values_of($haystack, 'apple'))->toBe(3);
+ expect($sentiText->array_count_values_of($haystack, 'banana'))->toBe(1);
+ expect($sentiText->array_count_values_of($haystack, 'cherry'))->toBe(1);
+ expect($sentiText->array_count_values_of($haystack, 'orange'))->toBe(0);
+ });
+
+ it('detects capitalization differential', function () {
+ // Test through the public property instead of private method
+
+ // All lowercase - no differential
+ $sentiText = new SentiText('hello world test');
+ expect($sentiText->is_cap_diff)->toBeFalse();
+
+ // All uppercase - no differential
+ $sentiText = new SentiText('HELLO WORLD TEST');
+ expect($sentiText->is_cap_diff)->toBeFalse();
+
+ // Mixed case - has differential
+ $sentiText = new SentiText('HELLO world TEST');
+ expect($sentiText->is_cap_diff)->toBeTrue();
+
+ // One uppercase among lowercase - has differential
+ $sentiText = new SentiText('hello WORLD test');
+ expect($sentiText->is_cap_diff)->toBeTrue();
+ });
+
+ it('sets is_cap_diff property correctly', function () {
+ // All lowercase
+ $sentiText = new SentiText('hello world test');
+ expect($sentiText->is_cap_diff)->toBeFalse();
+
+ // All uppercase
+ $sentiText = new SentiText('HELLO WORLD TEST');
+ expect($sentiText->is_cap_diff)->toBeFalse();
+
+ // Mixed case
+ $sentiText = new SentiText('HELLO world TEST');
+ expect($sentiText->is_cap_diff)->toBeTrue();
+ });
+
+ it('handles punctuation list correctly', function () {
+ expect(SentiText::PUNC_LIST)->toBeArray();
+ expect(SentiText::PUNC_LIST)->toContain('.');
+ expect(SentiText::PUNC_LIST)->toContain('!');
+ expect(SentiText::PUNC_LIST)->toContain('?');
+ expect(SentiText::PUNC_LIST)->toContain('!!!');
+ expect(SentiText::PUNC_LIST)->toContain('???');
+ });
+
+ it('preserves emoticons when extracting words', function () {
+ $emoticons = [':)', ':(', ':D', ';)', ':/', ':P'];
+ $text = 'Hello :) this is good';
+
+ $sentiText = new SentiText($text);
+
+ // Check that emoticons are preserved (though they might be part of words)
+ expect($sentiText->words_and_emoticons)->toContain(':)');
+ });
+
+ it('handles contractions correctly', function () {
+ $contractions = [
+ "don't" => "don't",
+ "won't" => "won't",
+ "can't" => "can't",
+ "wouldn't" => "wouldn't",
+ ];
+
+ foreach ($contractions as $input => $expected) {
+ $sentiText = new SentiText($input);
+ expect($sentiText->words_and_emoticons)->toContain($expected);
+ }
+ });
+
+ it('filters out single letter words', function () {
+ $text = 'I a test of single letters x y z';
+ $sentiText = new SentiText($text);
+
+ // Single letters should be filtered out
+ expect($sentiText->words_and_emoticons)->not->toContain('I');
+ expect($sentiText->words_and_emoticons)->not->toContain('a');
+ expect($sentiText->words_and_emoticons)->not->toContain('x');
+ expect($sentiText->words_and_emoticons)->not->toContain('y');
+ expect($sentiText->words_and_emoticons)->not->toContain('z');
+
+ // Multi-letter words should remain
+ expect($sentiText->words_and_emoticons)->toContain('test');
+ expect($sentiText->words_and_emoticons)->toContain('of');
+ expect($sentiText->words_and_emoticons)->toContain('single');
+ expect($sentiText->words_and_emoticons)->toContain('letters');
+ });
+
+ it('handles empty and whitespace text', function () {
+ $emptyText = new SentiText('');
+ expect($emptyText->words_and_emoticons)->toBeArray();
+ expect($emptyText->words_and_emoticons)->toBeEmpty();
+
+ $whitespaceText = new SentiText(' ');
+ expect($whitespaceText->words_and_emoticons)->toBeArray();
+ expect($whitespaceText->words_and_emoticons)->toBeEmpty();
+ });
+
+ it('handles text with multiple punctuation marks', function () {
+ $text = 'Wow!!! Really??? That is good';
+ $sentiText = new SentiText($text);
+
+ expect($sentiText->words_and_emoticons)->toContain('Wow');
+ expect($sentiText->words_and_emoticons)->toContain('Really');
+ expect($sentiText->words_and_emoticons)->toContain('That');
+ expect($sentiText->words_and_emoticons)->toContain('is');
+ expect($sentiText->words_and_emoticons)->toContain('good');
+ });
+
+ it('preserves word order in words_and_emoticons', function () {
+ $text = 'First second third fourth';
+ $sentiText = new SentiText($text);
+
+ $words = $sentiText->words_and_emoticons;
+ expect($words[0])->toBe('First');
+ expect($words[1])->toBe('second');
+ expect($words[2])->toBe('third');
+ expect($words[3])->toBe('fourth');
+ });
+});