Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/introduction/finding_records.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and then merges data together from every element in the document to produce a si
Consider this simple document as an example::

>>> from chemdataextractor.doc import Document, Heading, Paragraph
>>> from chemdataextractor.model import Compound, MeltingPoint
>>> doc = Document(
Heading('5,10,15,20-Tetra(4-carboxyphenyl)porphyrin (3).'),
Paragraph('m.p. 90°C.'),
Expand Down
85 changes: 58 additions & 27 deletions examples/automatic_parsing_for_tables.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
"+--------+--------------------------+------------------------------+\n",
"| Data | Row Categories | Column Categories |\n",
"+--------+--------------------------+------------------------------+\n",
"| 1100 | ['Inorganic', 'BiFeO3'] | ['Temperatures', 'Tc/K'] |\n",
"| 643 | ['Inorganic', 'BiFeO3'] | ['Temperatures', 'Tn/K'] |\n",
"| | ['Inorganic', 'BiFeO3'] | ['Magnetic moment', 'B [T]'] |\n",
"| 257 | ['Inorganic', ' LaCrO3'] | ['Temperatures', 'Tc/K'] |\n",
"| 150 | ['Inorganic', ' LaCrO3'] | ['Temperatures', 'Tn/K'] |\n",
"| 0.1 mT | ['Inorganic', ' LaCrO3'] | ['Magnetic moment', 'B [T]'] |\n",
"| | ['Organic', 'LaCrO2'] | ['Temperatures', 'Tc/K'] |\n",
"| 10 | ['Organic', 'LaCrO2'] | ['Temperatures', 'Tn/K'] |\n",
"| 500 | ['Organic', 'LaCrO2'] | ['Magnetic moment', 'B [T]'] |\n",
"| | ['Inorganic', 'Gd'] | ['Temperatures', 'Tc/K'] |\n",
"| 294 | ['Inorganic', 'Gd'] | ['Temperatures', 'Tn/K'] |\n",
"| 659 T | ['Inorganic', 'Gd'] | ['Magnetic moment', 'B [T]'] |\n",
"+--------+--------------------------+------------------------------+\n"
"+---------+--------------------------+------------------------------+\n",
"| Data | Row Categories | Column Categories |\n",
"+---------+--------------------------+------------------------------+\n",
"| 1100 | ['Inorganic', 'BiFeO3'] | ['Temperatures', 'Tc/K'] |\n",
"| 643 | ['Inorganic', 'BiFeO3'] | ['Temperatures', 'Tn/K'] |\n",
"| NoValue | ['Inorganic', 'BiFeO3'] | ['Magnetic moment', 'B [T]'] |\n",
"| 257 | ['Inorganic', ' LaCrO3'] | ['Temperatures', 'Tc/K'] |\n",
"| 150 | ['Inorganic', ' LaCrO3'] | ['Temperatures', 'Tn/K'] |\n",
"| 0.1 mT | ['Inorganic', ' LaCrO3'] | ['Magnetic moment', 'B [T]'] |\n",
"| NoValue | ['Organic', 'LaCrO2'] | ['Temperatures', 'Tc/K'] |\n",
"| 10 | ['Organic', 'LaCrO2'] | ['Temperatures', 'Tn/K'] |\n",
"| 500 | ['Organic', 'LaCrO2'] | ['Magnetic moment', 'B [T]'] |\n",
"| NoValue | ['Inorganic', 'Gd'] | ['Temperatures', 'Tc/K'] |\n",
"| 294 | ['Inorganic', 'Gd'] | ['Temperatures', 'Tn/K'] |\n",
"| 659 T | ['Inorganic', 'Gd'] | ['Magnetic moment', 'B [T]'] |\n",
"+---------+--------------------------+------------------------------+\n"
]
},
{
Expand All @@ -53,7 +53,7 @@
"from chemdataextractor.doc.table import Table\n",
"from chemdataextractor.doc import Caption\n",
"\n",
"path = \"./example_tables/table_example_tkt_2.csv\"\n",
"path = \"./data/table_example.csv\"\n",
"table = Table(caption=Caption(\"\"),table_data=path)\n",
"\n",
"print(table.tde_table)\n",
Expand Down Expand Up @@ -104,15 +104,25 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Initialising AllenNLP model ✔ \n",
"{'Compound': {'names': ['BiFeO3']}}\n",
"{'Compound': {'names': ['LaCrO3']}}\n",
"{'Compound': {'names': ['LaCrO2']}}\n",
"{'Compound': {'names': ['Gd']}}\n",
"{'CurieTemperature': {'raw_value': '1100', 'raw_units': 'K', 'value': [1100.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['BiFeO3']}}}}\n",
"{'CurieTemperature': {'raw_value': '643', 'value': [643.0], 'compound': {'Compound': {'names': ['BiFeO3']}}}}\n",
"{'CurieTemperature': {'raw_value': '257', 'raw_units': 'K', 'value': [257.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO3']}}}}\n",
"{'CurieTemperature': {'raw_value': '150', 'value': [150.0], 'compound': {'Compound': {'names': ['LaCrO3']}}}}\n",
"{'CurieTemperature': {'raw_value': '0.1', 'value': [0.1], 'compound': {'Compound': {'names': ['LaCrO3']}}}}\n",
"{'CurieTemperature': {'raw_value': '10', 'raw_units': 'K', 'value': [10.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO2']}}}}\n",
"{'CurieTemperature': {'raw_value': '294', 'raw_units': 'K', 'value': [294.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Gd']}}}}\n"
"{'CurieTemperature': {'raw_value': '500', 'raw_units': 'K', 'value': [500.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO2']}}}}\n",
"{'CurieTemperature': {'raw_value': '294', 'raw_units': 'K', 'value': [294.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Gd']}}}}\n",
"{'CurieTemperature': {'raw_value': '659', 'raw_units': 'K', 'value': [659.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Gd']}}}}\n"
]
}
],
"source": [
"table.models = [CurieTemperature]\n",
"table = Table(caption=Caption(\"\"),table_data=path, models=[CurieTemperature])\n",
"for record in table.records:\n",
" print(record.serialize())"
]
Expand All @@ -130,16 +140,26 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Compound': {'names': ['BiFeO3']}}\n",
"{'Compound': {'names': ['LaCrO3']}}\n",
"{'Compound': {'names': ['LaCrO2']}}\n",
"{'Compound': {'names': ['Gd']}}\n",
"{'CurieTemperature': {'raw_value': '1100', 'raw_units': 'K', 'value': [1100.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['BiFeO3']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '643', 'value': [643.0], 'compound': {'Compound': {'names': ['BiFeO3']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '257', 'raw_units': 'K', 'value': [257.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO3']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '10', 'raw_units': 'K', 'value': [10.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO2']}}}}\n"
"{'CurieTemperature': {'raw_value': '150', 'value': [150.0], 'compound': {'Compound': {'names': ['LaCrO3']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '0.1', 'value': [0.1], 'compound': {'Compound': {'names': ['LaCrO3']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '10', 'raw_units': 'K', 'value': [10.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO2']}}}}\n",
"{'CurieTemperature': {'raw_value': '500', 'raw_units': 'K', 'value': [500.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['LaCrO2']}}}}\n",
"{'CurieTemperature': {'raw_value': '294', 'raw_units': 'K', 'value': [294.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Gd']}}, 'label': 'Inorganic'}}\n",
"{'CurieTemperature': {'raw_value': '659', 'raw_units': 'K', 'value': [659.0], 'units': 'Kelvin^(1.0)', 'specifier': 'Tc', 'compound': {'Compound': {'names': ['Gd']}}, 'label': 'Inorganic'}}\n"
]
}
],
Expand All @@ -150,24 +170,35 @@
" compound = ModelType(Compound, required=True, contextual=True)\n",
" label = StringType(parse_expression=I('inorganic'))\n",
" \n",
"table.models = [CurieTemperature]\n",
"table = Table(caption=Caption(\"\"),table_data=path, models=[CurieTemperature])\n",
"for record in table.records:\n",
" print(record.serialize())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Compound': {'names': ['BiFeO3']}}\n",
"{'Compound': {'names': ['LaCrO3']}}\n",
"{'Compound': {'names': ['LaCrO2']}}\n",
"{'Compound': {'names': ['Gd']}}\n"
]
}
],
"source": [
"class CurieTemperature(TemperatureModel):\n",
" StringType(parse_expression=I('TC'), required=True, contextual=True, updatable=True)\n",
" specifier = StringType(parse_expression=I('TC'), required=True, contextual=True, updatable=True)\n",
" compound = ModelType(Compound, required=True, contextual=True)\n",
" label = StringType(parse_expression=I('something else'), required=True)\n",
" \n",
"table.models = [CurieTemperature]\n",
"table = Table(caption=Caption(\"\"),table_data=path, models=[CurieTemperature])\n",
"for record in table.records:\n",
" print(record.serialize())"
]
Expand All @@ -182,7 +213,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -196,7 +227,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down