-
Notifications
You must be signed in to change notification settings - Fork 14
Nhs number generator improvements #121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| import random | ||
| import numpy as np | ||
| from numpy.random import default_rng | ||
|
|
||
|
|
||
| def nhsNumberValidator(number: int) -> bool: | ||
|
|
@@ -48,7 +48,7 @@ def nhsNumberValidator(number: int) -> bool: | |
|
|
||
| def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: | ||
| """ | ||
| Generates up to 1M random NHS numbers compliant with modulus 11 checks as recorded | ||
| Generates random NHS numbers compliant with modulus 11 checks as recorded | ||
| in the data dictonary. | ||
| https://www.datadictionary.nhs.uk/data_dictionary/attributes/n/nhs/nhs_number_de.asp?shownav=1 | ||
|
|
||
|
|
@@ -62,27 +62,40 @@ def nhsNumberGenerator(to_generate: int, random_state: int = None) -> list: | |
| Returns | ||
| ---------- | ||
| generated : list | ||
| List of randomly generated NHS numbers | ||
| List of randomly generated valid NHS numbers | ||
|
|
||
| Examples | ||
| --------- | ||
| >>> nhsNumberGenerator(2, random_state=42) | ||
| [8429141456, 2625792787] | ||
| [5065337063, 1104866676] | ||
| """ | ||
|
|
||
| if random_state: | ||
| random.seed(random_state) | ||
| if not isinstance(to_generate, int): | ||
| raise ValueError("Please input a positive integer to generate numbers.") | ||
| if to_generate > 1000000: | ||
| raise ValueError("More than one million values requested") | ||
| if to_generate < 0: | ||
| raise ValueError("Please input a postitive integer to generate numbers.") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| rng = default_rng(random_state) | ||
|
|
||
| generated = [] | ||
| while len(generated) < to_generate: | ||
| # Random 10 digit integer, starting with non-zero digit | ||
| number = random.randint(1000000000, 9999999999) | ||
| if nhsNumberValidator(number): | ||
| generated.append(number) | ||
| return generated | ||
| # The NHS numbers are generated in three stages. | ||
| # First, generate 8 digits, using numpy.randint (the middle 8 digits) | ||
| # Second, generate the check digit portions for each block of 8 digits | ||
| # Third, generate 1 digit (the 1st digit) between 1 and 8 | ||
| # increase this value by 1 if it is at or above the value which would cause a check digit of 10 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| # be aware that this will not produce a fully uniform distribution over NHS numbers | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| # the distribution will not produce any NHS number with a leading digit (or check digit) of 1 where the | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| # contribution of the middle 8 digits to the check digit is 0 | ||
| # Fourth, generate the check digit from the above values | ||
| # Fifth, combine the digits into a number | ||
| base_number = rng.integers(0, 9, size=(to_generate, 8), dtype=np.int32) | ||
| check_digit_portion = np.vstack(np.dot(base_number, np.arange(9, 1, -1)) % 11) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| leading_candidate = rng.integers(1, 8, size=(to_generate, 1), dtype=np.int32) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
|
|
||
| # The resulting check digit is x_10 - k, where k is the contribution of the other digits | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| # Then the check digit would be 10 (invalid) if the leading digit were k+10 mod 11, or equivalently k-1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| leading_digit = leading_candidate + (leading_candidate >= check_digit_portion - 1) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| check_digit = (leading_digit - check_digit_portion) % 11 | ||
| result_digits = np.hstack([leading_digit, base_number, check_digit]) | ||
|
|
||
| result = np.dot(result_digits, 10 ** np.arange(9, -1, -1, dtype=np.int64)) | ||
|
|
||
| return [int(val) for val in result] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extra line required after line 101 |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,20 +5,32 @@ | |
|
|
||
| @pytest.mark.parametrize( | ||
| "to_generate, random_state, expected", | ||
| [(3, 42, [8429141456, 2625792787, 8235363119]), (2, 1, [9598980006, 6597925149])], | ||
| [(3, 42, [7065337065, 6104866670, 4417443181]), (2, 1, [6446801785, 4227327237])], | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| ) | ||
| def test_nhsNumberGenerator_BAU(to_generate, random_state, expected): | ||
| assert expected == nhsNumberGenerator(to_generate, random_state=random_state) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
|
|
||
|
|
||
| @pytest.mark.parametrize("to_generate", [4.2, 1000001, -1]) | ||
| @pytest.mark.parametrize( | ||
| "to_generate, random_state", [(100, 111), (100, None), (100, 999)], | ||
| ) | ||
| def test_nhsNumberGenerator_makesValid(to_generate, random_state): | ||
| assert all( | ||
| ( | ||
| nhsNumberValidator(val) | ||
| for val in nhsNumberGenerator(to_generate, random_state=random_state) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long, please reduce to 70 |
||
| ) | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("to_generate", [4.2, -1]) | ||
| def test_nhsNumberGenerator_valueErrors(to_generate): | ||
| with pytest.raises(ValueError): | ||
| nhsNumberGenerator(to_generate) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "to_validate, expected", [(9598980006, True), (9598980007, False)] | ||
| "to_validate, expected", [(6771116069, True), (9598980007, False)] | ||
| ) | ||
| def test_nhsNumberValidator_BAU(to_validate, expected): | ||
| assert expected == nhsNumberValidator(to_validate) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Blank line required after line 24 |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Line is too long, please reduce to 70