diff --git a/lib/mw_dictionary_api/parsers/definition_parser.rb b/lib/mw_dictionary_api/parsers/definition_parser.rb
index bd59a3d..943aefa 100644
--- a/lib/mw_dictionary_api/parsers/definition_parser.rb
+++ b/lib/mw_dictionary_api/parsers/definition_parser.rb
@@ -26,6 +26,10 @@ class DefinitionParser
end
end
+ rule :sense_divider do |data, opts|
+ data[:sense_divider] && data[:sense_divider].content
+ end
+
rule :text do |data, opts|
dt_without_vi = data[:dt].dup
if dt_without_vi.respond_to? :css
diff --git a/lib/mw_dictionary_api/parsers/entry_parser.rb b/lib/mw_dictionary_api/parsers/entry_parser.rb
index 9faabb2..c5fc9d2 100644
--- a/lib/mw_dictionary_api/parsers/entry_parser.rb
+++ b/lib/mw_dictionary_api/parsers/entry_parser.rb
@@ -40,10 +40,11 @@ class EntryParser
rule :definitions do |data, opts|
nodes = data.xpath("def//sn | def//dt")
+ sd = nil
# first step we will add dummy nodes if the list of nodes is not
# strictly sn/dt pairs
- nodes = add_dumy_nodes(nodes) if nodes.count % 2 != 0
+ nodes = add_dummy_nodes(nodes)
# data.xpath("def//sn | def//dt")
nodes.each_slice(2).inject([]) do |definitions, nodes|
@@ -57,6 +58,7 @@ class EntryParser
end
hash = Hash[names.zip(values)]
hash[:prev_sn] = definitions[-1][:sense_number] if definitions[-1]
+ hash[:sense_divider] = sd if sd = previous_sense_divider(nodes[1])
definitions << DefinitionParser.new(parser_options(opts)).parse(hash)
end
@@ -87,6 +89,18 @@ class EntryParser
inflections
end
+ rule :undefined_run_ons do |data, opts|
+ data.xpath("uro").inject([]) do |uros, uro_node|
+ hash = {}
+ hash[:entry] = parse_entity(uro_node, "ure")
+ hash[:sound] = parse_entity(uro_node, "sound wav")
+ hash[:pronunciation] = parse_entity(uro_node, "pr")
+ hash[:part_of_speech] = parse_entity(uro_node, "fl")
+
+ uros << hash
+ end
+ end
+
rule_helpers do
def parser_options(opts)
{ api_type: opts[:api_type], response_format: opts[:response_format] }
@@ -96,7 +110,15 @@ def parse_entity(data, tag)
data.at_css(tag).content if data.at_css(tag)
end
- def add_dumy_nodes(nodes)
+ def previous_sense_divider(node)
+ if node.previous_element && node.previous_element.name == 'sd'
+ node.previous_element
+ else
+ nil
+ end
+ end
+
+ def add_dummy_nodes(nodes)
temp = []
previous_sense_number = nil
nodes.each do |node|
diff --git a/spec/fixtures/insouciance_collegiate.xml b/spec/fixtures/insouciance_collegiate.xml
new file mode 100644
index 0000000..2606b5a
--- /dev/null
+++ b/spec/fixtures/insouciance_collegiate.xml
@@ -0,0 +1,4 @@
+
+
+ insouciancein*sou*ci*anceinsouc02.wavin-!sU-sE-un(t)sinsouc01.wava~-sUs-y@~sin-ˈsü-sē-ən(t)s, aⁿ-süs-ˈyäⁿsnounFrench, from in- + soucier to trouble, disturb, from Old French, from Latin sollicitare solicit1799:lighthearted unconcern :nonchalancein*sou*ci*antinsouc03.wavin-!sU-sE-unt in-ˈsü-sē-ənt, aⁿ-süs-yäⁿ adjectivein*sou*ci*ant*lyinsouc04.wavin-!sU-sE-unt-lE in-ˈsü-sē-ənt-lē adverb
+
\ No newline at end of file
diff --git a/spec/fixtures/scant_collegiate.xml b/spec/fixtures/scant_collegiate.xml
new file mode 100644
index 0000000..dfe1f5a
--- /dev/null
+++ b/spec/fixtures/scant_collegiate.xml
@@ -0,0 +1,93 @@
+
+
+
+ scant
+ DI-1
+ scant
+
+ scant001.wav
+ !skant
+
+ ˈskant
+ adjective
+ Middle English, from Old Norse
+ skamt,
+ neuter of
+ skammr
+ short
+
+ 14th century
+ 1
+ dialect
+ a
+ :excessively frugal
+ b
+ :not prodigal :
+ chary
+ 2 a
+ :barely or scarcely sufficient
+ especially
+ :not quite coming up to a stated measure
+
+ a
+ scant
+ teaspoon
+
+ b
+ :lacking in amplitude or quantity
+
+ scant
+ growth
+
+ 3
+ :having a small or insufficient supply
+
+ he's fat, and
+ scant
+ of breath
+ Shakespeare
+
+ meager
+
+
+ scant*ly
+ adverb
+
+
+ scant*ness
+ noun
+
+
+
+ scant
+ DI-1
+ scant
+ adverb
+
+ 15th century
+ dialect
+ :
+ scarcely
+ hardly
+
+
+
+ scant
+ scant
+ verb
+
+ transitive verb
+ circa 1580
+ 1
+ :to provide an incomplete supply of
+ 2
+ :to make small, narrow, or meager
+ 3
+ :to give scant attention to :
+ slight
+ 4
+ :to provide with a meager or inadequate portion or supply :
+ stint
+
+
+
\ No newline at end of file
diff --git a/spec/lib/mw_dictionary_api/parsers/entry_parser_spec.rb b/spec/lib/mw_dictionary_api/parsers/entry_parser_spec.rb
index e07d6a5..cee2c4d 100644
--- a/spec/lib/mw_dictionary_api/parsers/entry_parser_spec.rb
+++ b/spec/lib/mw_dictionary_api/parsers/entry_parser_spec.rb
@@ -18,6 +18,9 @@ module Parsers
let(:one_collegiate_entry) { one_collegiate_xml_doc.at_css("entry") }
let(:shrift_collegiate_entry) { Nokogiri::XML(File.open(fixture_path('shrift_collegiate.xml')).read).at_css("entry") }
+ let(:scant_collegiate_entry) { Nokogiri::XML(File.open(fixture_path('scant_collegiate.xml')).read).at_css("entry") }
+
+ let(:insouciance_entry) { Nokogiri::XML(File.open(fixture_path('insouciance_collegiate.xml')).read).at_css("entry") }
let(:parser) { EntryParser.new }
@@ -96,11 +99,41 @@ def parse(data)
end
describe "definitions" do
- it 'returns a list of definition pairs' do
- definitions = parse(shrift_collegiate_entry)[:definitions]
- expect(definitions.count).to eq 4
+ context "when there's an odd number of sense/definition pairs" do
+ it 'returns a list of definition pairs' do
+ definitions = parse(shrift_collegiate_entry)[:definitions]
+ expect(definitions.count).to eq 4
+ end
+ end
+
+ context "when there's a mismatched set of sense/definition pairs" do
+ it 'returns a list of definition pairs' do
+ definitions = parse(scant_collegiate_entry)[:definitions]
+ expect(definitions.count).to eq 7
+ end
+ end
+
+ it 'identifies sense dividers in adjacent definitions' do
+ definitions = parse(scant_collegiate_entry)[:definitions]
+ expect(definitions[4][:sense_divider]).to eq 'especially'
end
end
+
+ describe "undefined_run_ons" do
+ let(:undefined_run_ons) { parse(one_entry1)[:undefined_run_ons] }
+ let(:insouciance_uros) { parse(insouciance_entry)[:undefined_run_ons] }
+
+ it "returns a list of run_ons if available" do
+ expect(undefined_run_ons).to be_empty
+
+ expect(insouciance_uros).to eq([
+ {:entry=>"in*sou*ci*ant", :sound=>"insouc03.wav",
+ :pronunciation=>"in-ˈsü-sē-ənt, aⁿ-süs-yäⁿ", :part_of_speech=>"adjective"},
+ {:entry=>"in*sou*ci*ant*ly", :sound=>"insouc04.wav",
+ :pronunciation=>"in-ˈsü-sē-ənt-lē", :part_of_speech=>"adverb"}])
+ end
+
+ end
end
end
end