From 79bebb093d9cf994e4982630f8d9fcf0a0ef7164 Mon Sep 17 00:00:00 2001 From: Allen Tullett Date: Thu, 27 Mar 2025 16:33:50 +0000 Subject: [PATCH 1/5] Updated the parse function to correctly validate classmarks --- src/Classmark.php | 70 +++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/src/Classmark.php b/src/Classmark.php index fe45c96..00842f3 100644 --- a/src/Classmark.php +++ b/src/Classmark.php @@ -56,41 +56,63 @@ public function __construct($subject, $subdivision = '', $author = '', $prefix = */ public static function parse($classmark) { + // Validate the classmark. if (!is_string($classmark) || !preg_match('/^([a-z\ ]*[A-Z]{1,2}[A-Za-z0-9\.\ ]*)$/', $classmark)) { throw new \InvalidArgumentException('Invalid classmark provided for parse.'); } // Setup our variables. - $author = ''; - $prefix = ''; $subject = ''; - $subdivision = ''; - $classmark = trim($classmark); - - // Strip any lower-case characters from the start of the string. - if (preg_match('/^([a-z]*)/', $classmark, $matches)) { - $prefix = $matches[1]; - $classmark = trim(substr($classmark, strlen($prefix))); + $sub_subject = ''; + + // Check the first character and if it is a lowercase 'f' or 'q' then remove it + if (strlen($classmark) > 1 && preg_match('/(^f|^q)/', $classmark)) { + $classmark = substr($classmark, 1); } - - // Strip any 3 lower case characters from the end of the string - // if separated by a space. - if (preg_match('/\ ([a-z]{3})$/', $classmark, $matches)) { - $author = $matches[1]; - $classmark = trim(substr($classmark, 0, strpos($classmark, $author))); + + // Uppercase our $classmark string + $classmark = strtoupper($classmark); + + // Check first 2 characters and if they are 'FF' or 'QQ' then remove the first character + // This relates to when there is a 'f' (for folio) or 'q' (for quarto) at the start of the classmark. We will never have a classmark that has 2 'f's or 2 'q's at the start. + if (preg_match('/^FF/', $classmark) || preg_match('/^QQ/', $classmark)) { + $classmark = substr($classmark, 1); } - - // Strip off the first set of uppercase alpha characters. - if (preg_match('/(^[A-Z]{1,4})/', $classmark, $matches)) { - $subject = $matches[1]; - $classmark = trim(substr($classmark, strlen($subject))); + + // Check if the string has 1 or 2 alpha characters at the start and then store as $subject + if (preg_match('/^[A-Z][A-Z]/', $classmark)) { + $subject = substr($classmark, 0, 2); + } else { + $subject = substr($classmark, 0, 1); } + + // Trim these characters from the $classmark string and store as $sub_subject + $sub_subject = trim(substr($classmark, strlen($subject))); + + // If first character is a decimal or space remove it + if (preg_match('/(^\.|^\s)/', $sub_subject)) { + $sub_subject = trim(substr($sub_subject, 1)); + } + + // Find index of first space + preg_match('/\s/', $sub_subject, $matches, PREG_OFFSET_CAPTURE); + if (!empty($matches)) { + // Remove all characters from that index to the end of the string + $index = $matches[0][1]; + $sub_subject = substr($sub_subject, 0, $index); + } + + // Check last character for a decimal or space + preg_match('/(\.|\s)$/', $sub_subject, $matches, PREG_OFFSET_CAPTURE); + if (!empty($matches)) { + $index = $matches[0][1]; + // If it is a space or a decimal, remove it + $sub_subject = substr($sub_subject, 0, $index); + } + + return new static($subject . ' ' . $sub_subject); - // Subdivision is whatever is left. - $subdivision = $classmark; - - return new static($subject, $subdivision, $author, $prefix); } /** From 05b9a5838c5d9d1b3c752940e213ac3412038ddf Mon Sep 17 00:00:00 2001 From: Allen Tullett Date: Tue, 8 Apr 2025 16:09:48 +0100 Subject: [PATCH 2/5] Rewritten parse function This was following discussions with Stewart and Simone about what is a valid Classmark. --- src/Classmark.php | 99 +++++++++++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/src/Classmark.php b/src/Classmark.php index 00842f3..69ebdeb 100644 --- a/src/Classmark.php +++ b/src/Classmark.php @@ -56,46 +56,78 @@ public function __construct($subject, $subdivision = '', $author = '', $prefix = */ public static function parse($classmark) { - - // Validate the classmark. - if (!is_string($classmark) || !preg_match('/^([a-z\ ]*[A-Z]{1,2}[A-Za-z0-9\.\ ]*)$/', $classmark)) { - throw new \InvalidArgumentException('Invalid classmark provided for parse.'); + + // Validate the classmark + if($classmark == null || $classmark == '' || empty($classmark)) { + throw new \InvalidArgumentException('Invalid classmark provided for parse - No value provided;'); } - + // Setup our variables. - $subject = ''; - $sub_subject = ''; + $subject = null; + $sub_subject = null; + + $classmark = strtoupper($classmark); // Uppercase our $classmark string - // Check the first character and if it is a lowercase 'f' or 'q' then remove it - if (strlen($classmark) > 1 && preg_match('/(^f|^q)/', $classmark)) { - $classmark = substr($classmark, 1); + // Does the classmark start with a number? + if (is_numeric(substr($classmark, 0, 1))) { + throw new \InvalidArgumentException('Invalid classmark provided for parse - Value beings with a number (' . $classmark . ');'); } - // Uppercase our $classmark string - $classmark = strtoupper($classmark); + // PREFIX + // ---------------------------------------------------------------------------------- + + // If the string start with 'FOL', 'LRG', or 'PER' + if(substr($classmark, 0, 3) == 'FOL' || substr($classmark, 0, 3) == 'LRG' || substr($classmark, 0, 3) == 'PER') { + + // If yes - Remove the prefix + $classmark = substr($classmark, 3); // Remove the prefix + + // Check if first character is a space + if(substr($classmark, 0, 1) == ' ') { + $classmark = substr($classmark, 1); // Remove the space + } - // Check first 2 characters and if they are 'FF' or 'QQ' then remove the first character - // This relates to when there is a 'f' (for folio) or 'q' (for quarto) at the start of the classmark. We will never have a classmark that has 2 'f's or 2 'q's at the start. - if (preg_match('/^FF/', $classmark) || preg_match('/^QQ/', $classmark)) { - $classmark = substr($classmark, 1); } - - // Check if the string has 1 or 2 alpha characters at the start and then store as $subject - if (preg_match('/^[A-Z][A-Z]/', $classmark)) { - $subject = substr($classmark, 0, 2); - } else { - $subject = substr($classmark, 0, 1); + + // If there is single prefix letter ('F', 'L', 'P', or 'Q') followed by a space / decimal and another letter (If followed by a number then its is valid) + $prefixes = ['F', 'L', 'P', 'Q']; + if (in_array(substr($classmark, 0, 1), $prefixes) && in_array($classmark[1], [' ', '.'])) { + $classmark = substr($classmark, 2); // Remove the character and the space / decimal + } + + // Check if the first two characters are either 'ff', 'll', 'pp', or 'qq' + if (preg_match('/^ff|^ll|^pp|^qq/i', $classmark)) { + // If yes - Remove the first character and continue + $classmark = substr($classmark, 1); // Remove the first character } - // Trim these characters from the $classmark string and store as $sub_subject + // How many letters are there at the start? + $number_index = 0; // Default value + if (preg_match('/\d/', $classmark, $matches, PREG_OFFSET_CAPTURE)) { + $number_index = $matches[0][1]; + } + + // SUBJECTS + // ---------------------------------------------------------------------------------- + + $subject = substr($classmark, 0, $number_index); $sub_subject = trim(substr($classmark, strlen($subject))); - // If first character is a decimal or space remove it - if (preg_match('/(^\.|^\s)/', $sub_subject)) { - $sub_subject = trim(substr($sub_subject, 1)); + // Find and remove any decimal or space on the subject value + if (preg_match('/(\.|\s)/', $subject, $matches, PREG_OFFSET_CAPTURE)) { + $subject = str_replace([' ','.'], '', $subject); } - // Find index of first space + // Check if first letter is a valid classmark + // If character is I, O, W, or X = Return false (Invalid classmark) + if (preg_match('/^[IOWX]/', $subject)) { + throw new \InvalidArgumentException('Invalid classmark provided for parse - First subject letter ' . $subject[0] . ' is invalid (' . $classmark . ');'); + } + + // SUB-SUBJECTS + // ---------------------------------------------------------------------------------- + + // Search for first single space in the remaining string preg_match('/\s/', $sub_subject, $matches, PREG_OFFSET_CAPTURE); if (!empty($matches)) { // Remove all characters from that index to the end of the string @@ -103,14 +135,15 @@ public static function parse($classmark) $sub_subject = substr($sub_subject, 0, $index); } - // Check last character for a decimal or space - preg_match('/(\.|\s)$/', $sub_subject, $matches, PREG_OFFSET_CAPTURE); - if (!empty($matches)) { - $index = $matches[0][1]; - // If it is a space or a decimal, remove it - $sub_subject = substr($sub_subject, 0, $index); + // Check if first character is a space or decimal + if (preg_match('/^(\.|\s)/', $sub_subject)) { + $sub_subject = substr($sub_subject, 1); // Remove the character } + if (empty($subject) || empty($sub_subject)) { + throw new \InvalidArgumentException('Invalid classmark provided for parse - Parsing returned an empty value for subject and/or sub-subject (' . $classmark . ');'); + } + return new static($subject . ' ' . $sub_subject); } From a09fe4f1cd37d2ee94b55d9533f17f5b6c7b454f Mon Sep 17 00:00:00 2001 From: Allen Tullett Date: Tue, 8 Apr 2025 16:12:39 +0100 Subject: [PATCH 3/5] Update Classmark.php --- src/Classmark.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Classmark.php b/src/Classmark.php index 69ebdeb..49870d2 100644 --- a/src/Classmark.php +++ b/src/Classmark.php @@ -70,7 +70,7 @@ public static function parse($classmark) // Does the classmark start with a number? if (is_numeric(substr($classmark, 0, 1))) { - throw new \InvalidArgumentException('Invalid classmark provided for parse - Value beings with a number (' . $classmark . ');'); + throw new \InvalidArgumentException('Invalid classmark provided for parse - Value begins with a number (' . $classmark . ');'); } // PREFIX From ebe1f5dee1f50ffb29885f04b41db4c40a8b7e65 Mon Sep 17 00:00:00 2001 From: Allen Tullett Date: Tue, 24 Jun 2025 13:38:12 +0100 Subject: [PATCH 4/5] Commented out prefix checks --- src/Classmark.php | 58 ++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/Classmark.php b/src/Classmark.php index 49870d2..9c2a63e 100644 --- a/src/Classmark.php +++ b/src/Classmark.php @@ -75,37 +75,39 @@ public static function parse($classmark) // PREFIX // ---------------------------------------------------------------------------------- + + // Commented out on 2023-10-30 as it may not be relevant going forward. - // If the string start with 'FOL', 'LRG', or 'PER' - if(substr($classmark, 0, 3) == 'FOL' || substr($classmark, 0, 3) == 'LRG' || substr($classmark, 0, 3) == 'PER') { + // // If the string start with 'FOL', 'LRG', or 'PER' + // if(substr($classmark, 0, 3) == 'FOL' || substr($classmark, 0, 3) == 'LRG' || substr($classmark, 0, 3) == 'PER') { - // If yes - Remove the prefix - $classmark = substr($classmark, 3); // Remove the prefix + // // If yes - Remove the prefix + // $classmark = substr($classmark, 3); // Remove the prefix - // Check if first character is a space - if(substr($classmark, 0, 1) == ' ') { - $classmark = substr($classmark, 1); // Remove the space - } - - } - - // If there is single prefix letter ('F', 'L', 'P', or 'Q') followed by a space / decimal and another letter (If followed by a number then its is valid) - $prefixes = ['F', 'L', 'P', 'Q']; - if (in_array(substr($classmark, 0, 1), $prefixes) && in_array($classmark[1], [' ', '.'])) { - $classmark = substr($classmark, 2); // Remove the character and the space / decimal - } - - // Check if the first two characters are either 'ff', 'll', 'pp', or 'qq' - if (preg_match('/^ff|^ll|^pp|^qq/i', $classmark)) { - // If yes - Remove the first character and continue - $classmark = substr($classmark, 1); // Remove the first character - } - - // How many letters are there at the start? - $number_index = 0; // Default value - if (preg_match('/\d/', $classmark, $matches, PREG_OFFSET_CAPTURE)) { - $number_index = $matches[0][1]; - } + // // Check if first character is a space + // if(substr($classmark, 0, 1) == ' ') { + // $classmark = substr($classmark, 1); // Remove the space + // } + + // } + + // // If there is single prefix letter ('F', 'L', 'P', or 'Q') followed by a space / decimal and another letter (If followed by a number then its is valid) + // $prefixes = ['F', 'L', 'P', 'Q']; + // if (in_array(substr($classmark, 0, 1), $prefixes) && in_array($classmark[1], [' ', '.'])) { + // $classmark = substr($classmark, 2); // Remove the character and the space / decimal + // } + + // // Check if the first two characters are either 'ff', 'll', 'pp', or 'qq' + // if (preg_match('/^ff|^ll|^pp|^qq/i', $classmark)) { + // // If yes - Remove the first character and continue + // $classmark = substr($classmark, 1); // Remove the first character + // } + + // // How many letters are there at the start? + // $number_index = 0; // Default value + // if (preg_match('/\d/', $classmark, $matches, PREG_OFFSET_CAPTURE)) { + // $number_index = $matches[0][1]; + // } // SUBJECTS // ---------------------------------------------------------------------------------- From a00aead97b1a2c10a56fc5f7699eb566002d8536 Mon Sep 17 00:00:00 2001 From: Allen Tullett Date: Thu, 3 Jul 2025 11:08:42 +0100 Subject: [PATCH 5/5] Reverted back to $subdivision variable name and removed redundant references to authors and prefixes --- src/Classmark.php | 90 ++++++----------------------------------------- 1 file changed, 11 insertions(+), 79 deletions(-) diff --git a/src/Classmark.php b/src/Classmark.php index 9c2a63e..6db366c 100644 --- a/src/Classmark.php +++ b/src/Classmark.php @@ -26,29 +26,13 @@ class Classmark */ private $subdivision; - /** - * Classmark author. - * - * @var string - */ - private $author; - - /** - * Classmark prefix. - * - * @var string - */ - private $prefix; - /** * Construct a new classmark object. */ - public function __construct($subject, $subdivision = '', $author = '', $prefix = '') + public function __construct($subject, $subdivision = '') { $this->subject = $subject; $this->subdivision = $subdivision; - $this->author = $author; - $this->prefix = $prefix; } /** @@ -64,7 +48,7 @@ public static function parse($classmark) // Setup our variables. $subject = null; - $sub_subject = null; + $subdivision = null; $classmark = strtoupper($classmark); // Uppercase our $classmark string @@ -73,47 +57,11 @@ public static function parse($classmark) throw new \InvalidArgumentException('Invalid classmark provided for parse - Value begins with a number (' . $classmark . ');'); } - // PREFIX - // ---------------------------------------------------------------------------------- - - // Commented out on 2023-10-30 as it may not be relevant going forward. - - // // If the string start with 'FOL', 'LRG', or 'PER' - // if(substr($classmark, 0, 3) == 'FOL' || substr($classmark, 0, 3) == 'LRG' || substr($classmark, 0, 3) == 'PER') { - - // // If yes - Remove the prefix - // $classmark = substr($classmark, 3); // Remove the prefix - - // // Check if first character is a space - // if(substr($classmark, 0, 1) == ' ') { - // $classmark = substr($classmark, 1); // Remove the space - // } - - // } - - // // If there is single prefix letter ('F', 'L', 'P', or 'Q') followed by a space / decimal and another letter (If followed by a number then its is valid) - // $prefixes = ['F', 'L', 'P', 'Q']; - // if (in_array(substr($classmark, 0, 1), $prefixes) && in_array($classmark[1], [' ', '.'])) { - // $classmark = substr($classmark, 2); // Remove the character and the space / decimal - // } - - // // Check if the first two characters are either 'ff', 'll', 'pp', or 'qq' - // if (preg_match('/^ff|^ll|^pp|^qq/i', $classmark)) { - // // If yes - Remove the first character and continue - // $classmark = substr($classmark, 1); // Remove the first character - // } - - // // How many letters are there at the start? - // $number_index = 0; // Default value - // if (preg_match('/\d/', $classmark, $matches, PREG_OFFSET_CAPTURE)) { - // $number_index = $matches[0][1]; - // } - // SUBJECTS // ---------------------------------------------------------------------------------- $subject = substr($classmark, 0, $number_index); - $sub_subject = trim(substr($classmark, strlen($subject))); + $subdivision = trim(substr($classmark, strlen($subject))); // Find and remove any decimal or space on the subject value if (preg_match('/(\.|\s)/', $subject, $matches, PREG_OFFSET_CAPTURE)) { @@ -126,44 +74,28 @@ public static function parse($classmark) throw new \InvalidArgumentException('Invalid classmark provided for parse - First subject letter ' . $subject[0] . ' is invalid (' . $classmark . ');'); } - // SUB-SUBJECTS + // SUBDIVISIONS // ---------------------------------------------------------------------------------- // Search for first single space in the remaining string - preg_match('/\s/', $sub_subject, $matches, PREG_OFFSET_CAPTURE); + preg_match('/\s/', $subdivision, $matches, PREG_OFFSET_CAPTURE); if (!empty($matches)) { // Remove all characters from that index to the end of the string $index = $matches[0][1]; - $sub_subject = substr($sub_subject, 0, $index); + $subdivision = substr($subdivision, 0, $index); } // Check if first character is a space or decimal - if (preg_match('/^(\.|\s)/', $sub_subject)) { - $sub_subject = substr($sub_subject, 1); // Remove the character + if (preg_match('/^(\.|\s)/', $subdivision)) { + $subdivision = substr($subdivision, 1); // Remove the character } - if (empty($subject) || empty($sub_subject)) { + if (empty($subject) || empty($subdivision)) { throw new \InvalidArgumentException('Invalid classmark provided for parse - Parsing returned an empty value for subject and/or sub-subject (' . $classmark . ');'); } - return new static($subject . ' ' . $sub_subject); - - } - - /** - * Return the author. - */ - public function get_author() - { - return $this->author; - } + return new static($subject, $subdivision); - /** - * Return the prefix. - */ - public function get_prefix() - { - return $this->prefix; } /** @@ -223,6 +155,6 @@ public function compareTo($classmark) */ public function __toString() { - return trim("{$this->prefix}{$this->subject}{$this->subdivision}{$this->author}"); + return trim("{$this->subject}{$this->subdivision}"); } }