Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/mw_dictionary_api/parsers/definition_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class DefinitionParser
end
end

rule :sense_divider do |data, opts|
data[:sense_divider] && data[:sense_divider].content
end

rule :text do |data, opts|
dt_without_vi = data[:dt].dup
if dt_without_vi.respond_to? :css
Expand Down
26 changes: 24 additions & 2 deletions lib/mw_dictionary_api/parsers/entry_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ class EntryParser

rule :definitions do |data, opts|
nodes = data.xpath("def//sn | def//dt")
sd = nil

# first step we will add dummy nodes if the list of nodes is not
# strictly sn/dt pairs
nodes = add_dumy_nodes(nodes) if nodes.count % 2 != 0
nodes = add_dummy_nodes(nodes)

# data.xpath("def//sn | def//dt")
nodes.each_slice(2).inject([]) do |definitions, nodes|
Expand All @@ -57,6 +58,7 @@ class EntryParser
end
hash = Hash[names.zip(values)]
hash[:prev_sn] = definitions[-1][:sense_number] if definitions[-1]
hash[:sense_divider] = sd if sd = previous_sense_divider(nodes[1])
definitions << DefinitionParser.new(parser_options(opts)).parse(hash)
end

Expand Down Expand Up @@ -87,6 +89,18 @@ class EntryParser
inflections
end

rule :undefined_run_ons do |data, opts|
data.xpath("uro").inject([]) do |uros, uro_node|
hash = {}
hash[:entry] = parse_entity(uro_node, "ure")
hash[:sound] = parse_entity(uro_node, "sound wav")
hash[:pronunciation] = parse_entity(uro_node, "pr")
hash[:part_of_speech] = parse_entity(uro_node, "fl")

uros << hash
end
end

rule_helpers do
def parser_options(opts)
{ api_type: opts[:api_type], response_format: opts[:response_format] }
Expand All @@ -96,7 +110,15 @@ def parse_entity(data, tag)
data.at_css(tag).content if data.at_css(tag)
end

def add_dumy_nodes(nodes)
def previous_sense_divider(node)
if node.previous_element && node.previous_element.name == 'sd'
node.previous_element
else
nil
end
end

def add_dummy_nodes(nodes)
temp = []
previous_sense_number = nil
nodes.each do |node|
Expand Down
4 changes: 4 additions & 0 deletions spec/fixtures/insouciance_collegiate.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8" ?>
<entry_list version="1.0">
<entry id="insouciance"><ew>insouciance</ew><hw>in*sou*ci*ance</hw><sound><wav>insouc02.wav</wav><wpr>in-!sU-sE-un(t)s</wpr><wav>insouc01.wav</wav><wpr>a~-sUs-y@~s</wpr></sound><pr>in-ˈsü-sē-ən(t)s, aⁿ-süs-ˈyäⁿs</pr><fl>noun</fl><et>French, from <it>in-</it> + <it>soucier</it> to trouble, disturb, from Old French, from Latin <it>sollicitare</it> <ma>solicit</ma></et><def><date>1799</date><dt>:lighthearted unconcern :<sx>nonchalance</sx></dt></def><uro><ure>in*sou*ci*ant</ure><sound><wav>insouc03.wav</wav><wpr>in-!sU-sE-unt</wpr></sound> <pr>in-ˈsü-sē-ənt, aⁿ-süs-yäⁿ</pr> <fl>adjective</fl></uro><uro><ure>in*sou*ci*ant*ly</ure><sound><wav>insouc04.wav</wav><wpr>in-!sU-sE-unt-lE</wpr></sound> <pr>in-ˈsü-sē-ənt-lē</pr> <fl>adverb</fl></uro></entry>
</entry_list>
93 changes: 93 additions & 0 deletions spec/fixtures/scant_collegiate.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<?xml version="1.0" encoding="utf-8" ?>
<entry_list version="1.0">
<entry id="scant[1]">
<ew>scant</ew>
<subj>DI-1</subj>
<hw hindex="1">scant</hw>
<sound>
<wav>scant001.wav</wav>
<wpr>!skant</wpr>
</sound>
<pr>ˈskant</pr>
<fl>adjective</fl>
<et>Middle English, from Old Norse
<it>skamt,</it>
neuter of
<it>skammr</it>
short</et>
<def>
<date>14th century</date>
<sn>1</sn>
<ssl>dialect</ssl>
<sn>a</sn>
<dt>:excessively frugal</dt>
<sn>b</sn>
<dt>:not prodigal :
<sx>chary</sx></dt>
<sn>2 a</sn>
<dt>:barely or scarcely sufficient</dt>
<sd>especially</sd>
<dt>:not quite coming up to a stated measure
<vi>
a
<it>scant</it>
teaspoon</vi>
</dt>
<sn>b</sn>
<dt>:lacking in amplitude or quantity
<vi>
<it>scant</it>
growth</vi>
</dt>
<sn>3</sn>
<dt>:having a small or insufficient supply
<vi>
he's fat, and
<it>scant</it>
of breath
<aq>Shakespeare</aq></vi>
</dt>
<ss>meager</ss>
</def>
<uro>
<ure>scant*ly</ure>
<fl>adverb</fl>
</uro>
<uro>
<ure>scant*ness</ure>
<fl>noun</fl>
</uro>
</entry>
<entry id="scant[2]">
<ew>scant</ew>
<subj>DI-1</subj>
<hw hindex="2">scant</hw>
<fl>adverb</fl>
<def>
<date>15th century</date>
<sl>dialect</sl>
<dt>:
<sx>scarcely</sx>
<sx>hardly</sx></dt>
</def>
</entry>
<entry id="scant[3]">
<ew>scant</ew>
<hw hindex="3">scant</hw>
<fl>verb</fl>
<def>
<vt>transitive verb</vt>
<date>circa 1580</date>
<sn>1</sn>
<dt>:to provide an incomplete supply of</dt>
<sn>2</sn>
<dt>:to make small, narrow, or meager</dt>
<sn>3</sn>
<dt>:to give scant attention to :
<sx>slight</sx></dt>
<sn>4</sn>
<dt>:to provide with a meager or inadequate portion or supply :
<sx>stint</sx></dt>
</def>
</entry>
</entry_list>
39 changes: 36 additions & 3 deletions spec/lib/mw_dictionary_api/parsers/entry_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ module Parsers
let(:one_collegiate_entry) { one_collegiate_xml_doc.at_css("entry") }

let(:shrift_collegiate_entry) { Nokogiri::XML(File.open(fixture_path('shrift_collegiate.xml')).read).at_css("entry") }
let(:scant_collegiate_entry) { Nokogiri::XML(File.open(fixture_path('scant_collegiate.xml')).read).at_css("entry") }

let(:insouciance_entry) { Nokogiri::XML(File.open(fixture_path('insouciance_collegiate.xml')).read).at_css("entry") }

let(:parser) { EntryParser.new }

Expand Down Expand Up @@ -96,11 +99,41 @@ def parse(data)
end

describe "definitions" do
it 'returns a list of definition pairs' do
definitions = parse(shrift_collegiate_entry)[:definitions]
expect(definitions.count).to eq 4
context "when there's an odd number of sense/definition pairs" do
it 'returns a list of definition pairs' do
definitions = parse(shrift_collegiate_entry)[:definitions]
expect(definitions.count).to eq 4
end
end

context "when there's a mismatched set of sense/definition pairs" do
it 'returns a list of definition pairs' do
definitions = parse(scant_collegiate_entry)[:definitions]
expect(definitions.count).to eq 7
end
end

it 'identifies sense dividers in adjacent definitions' do
definitions = parse(scant_collegiate_entry)[:definitions]
expect(definitions[4][:sense_divider]).to eq 'especially'
end
end

describe "undefined_run_ons" do
let(:undefined_run_ons) { parse(one_entry1)[:undefined_run_ons] }
let(:insouciance_uros) { parse(insouciance_entry)[:undefined_run_ons] }

it "returns a list of run_ons if available" do
expect(undefined_run_ons).to be_empty

expect(insouciance_uros).to eq([
{:entry=>"in*sou*ci*ant", :sound=>"insouc03.wav",
:pronunciation=>"in-ˈsü-sē-ənt, aⁿ-süs-yäⁿ", :part_of_speech=>"adjective"},
{:entry=>"in*sou*ci*ant*ly", :sound=>"insouc04.wav",
:pronunciation=>"in-ˈsü-sē-ənt-lē", :part_of_speech=>"adverb"}])
end

end
end
end
end