From 7f56c556a9e4cffe356240af14c95194f9e67032 Mon Sep 17 00:00:00 2001 From: Niklaus Giger Date: Mon, 28 Apr 2014 15:34:34 +0200 Subject: [PATCH] Using sax-machine for Preparation.xml Signed-off-by: Niklaus Giger --- Gemfile.lock | 3 + lib/oddb2xml/extractor.rb | 250 +++++++++++++++++++++++++++++++++++++--------- oddb2xml.gemspec | 3 +- spec/builder_spec.rb | 3 - 4 files changed, 208 insertions(+), 51 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 531a1c9..4ba7cd5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -8,6 +8,7 @@ PATH rubyXL (~> 2.5) rubyzip (~> 1.0) savon (~> 2.0) + sax-machine spreadsheet GEM @@ -87,6 +88,8 @@ GEM nokogiri (>= 1.4.0) nori (~> 2.3.0) wasabi (~> 3.2.2) + sax-machine (0.1.0) + nokogiri (> 0.0.0) slop (3.5.0) spreadsheet (0.9.7) ruby-ole (>= 1.0) diff --git a/lib/oddb2xml/extractor.rb b/lib/oddb2xml/extractor.rb index b57e8fb..38d69f2 100644 --- a/lib/oddb2xml/extractor.rb +++ b/lib/oddb2xml/extractor.rb @@ -4,6 +4,7 @@ require 'nokogiri' require 'spreadsheet' require 'stringio' require 'rubyXL' +require 'sax-machine' module Oddb2xml module TxtExtractorMethods @@ -39,65 +40,220 @@ module Oddb2xml class LppvExtractor < Extractor include TxtExtractorMethods end + + class PriceElement + include SAXMachine + element :Price + element :ValidFromDate + element :DivisionDescription + element :PriceTypeCode + element :PriceTypeDescriptionDe + element :PriceTypeDescriptionFr + element :PriceTypeDescriptionIt + element :PriceChangeTypeDescriptionDe + element :PriceChangeTypeDescriptionFr + element :PriceChangeTypeDescriptionIt + end + + class PricesElement + include SAXMachine + element :ExFactoryPrice, :class => PriceElement + element :PublicPrice, :class => PriceElement + end + + class StatusElement + include SAXMachine + element :IntegrationDate + element :ValidFromDate + element :ValidThruDate + element :StatusTypeCodeSl + element :StatusTypeDescriptionSl + element :FlagApd + end + + class LimitationElement + include SAXMachine + element :LimitationCode + element :LimitationType + element :LimitationValue + element :LimitationNiveau + element :DescriptionDe + element :DescriptionFr + element :DescriptionIt + element :ValidFromDate + element :ValidThruDate + end + + class LimitationsElement + include SAXMachine + element :Limitation, :class => LimitationElement + end + + class PointLimitationsElement + include SAXMachine + element :PointLimitation, :class => LimitationElement + end + + class PackContent + include SAXMachine + # + attribute :ProductKey + attribute :Pharmacode + attribute :PackId + element :DescriptionDe + element :DescriptionFr + element :DescriptionIt + element :SwissmedicCategory + element :SwissmedicNo8 + element :FlagNarcosis + element :FlagModal + element :BagDossierNo + element :GTIN + element :Limitations + element :PointLimitations, :class => PointLimitationsElement + element :Prices, :class => PricesElement + end + + class PacksElement + include SAXMachine + elements :Pack, :class => PackContent + end + + class ItCodeContent + include SAXMachine + attribute :Code + element :DescriptionDe + element :DescriptionFr + element :DescriptionIt + elements :Limitations, :class => LimitationElement + end + + class ItCodeEntry + include SAXMachine + element :ItCode, :class => ItCodeContent + end + + # handling attributes as suggested by https://github.com/pauldix/sax-machine/issues/30 + class ItCodesElement + include SAXMachine + elements :ItCode, :class => ItCodeContent + end + + class SubstanceElement + include SAXMachine + element :DescriptionLa + element :Quantity + element :QuantityUnit + end + + class SubstancesElement + include SAXMachine + elements :Substance, :class => SubstanceElement + end + + class PreparationContent + # attr_reader :ProductCommercial + include SAXMachine + attribute :ProductCommercial + element :NameFr + element :NameDe + element :NameIt + element :Status, :class => StatusElement + element :Dummy + element :DescriptionDe + element :DescriptionFr + element :DescriptionIt + element :AtcCode + element :SwissmedicNo5 + element :FlagItLimitation + element :OrgGenCode + element :FlagSB20 + element :CommentDe + element :CommentFr + element :CommentIt + element :VatInEXF + element :PointLimitations, :class => LimitationElement + element :Limitations, :class => LimitationsElement + element :Substances, :class => SubstancesElement + element :Packs, :class => PacksElement + element :ItCodes, :class => ItCodesElement + end + + class PreparationEntry + include SAXMachine + element :Preparation, :class => PreparationContent + end + + class PreparationsContent + include SAXMachine + attribute :ReleaseDate + elements :Preparation, :class => PreparationContent + end + + class PreparationsEntry + include SAXMachine + element :Preparations, :class => PreparationsContent + end + class BagXmlExtractor < Extractor def to_hash data = {} - doc = Nokogiri::XML(@xml) - doc.xpath('//Preparation').each do |seq| + result = PreparationsEntry.parse(@xml) + result.Preparations.Preparation.each do |seq| item = {} - item[:product_key] = seq.attr('ProductCommercial').to_s - item[:desc_de] = (desc = seq.at_xpath('.//DescriptionDe')) ? desc.text : '' - item[:desc_fr] = (desc = seq.at_xpath('.//DescriptionFr')) ? desc.text : '' - item[:name_de] = (name = seq.at_xpath('.//NameDe')) ? name.text : '' - item[:name_fr] = (name = seq.at_xpath('.//NameFr')) ? name.text : '' - item[:swissmedic_number5] = (num5 = seq.at_xpath('.//SwissmedicNo5')) ? (num5.text.rjust(5,'0')) : '' - item[:org_gen_code] = (orgc = seq.at_xpath('.//OrgGenCode')) ? orgc.text : '' - item[:deductible] = (ddbl = seq.at_xpath('.//FlagSB20')) ? ddbl.text : '' - item[:atc_code] = (atcc = seq.at_xpath('.//AtcCode')) ? atcc.text : '' - item[:comment_de] = (info = seq.at_xpath('.//CommentDe')) ? info.text : '' - item[:comment_fr] = (info = seq.at_xpath('.//CommentFr')) ? info.text : '' + item[:product_key] = seq.ProductCommercial + item[:desc_de] = (desc = seq.DescriptionDe) ? desc : '' + item[:desc_fr] = (desc = seq.DescriptionFr) ? desc : '' + item[:name_de] = (name = seq.NameDe) ? name : '' + item[:name_fr] = (name = seq.NameFr) ? name : '' + item[:swissmedic_number5] = (num5 = seq.SwissmedicNo5) ? (num5.rjust(5,'0')) : '' + item[:org_gen_code] = (orgc = seq.OrgGenCode) ? orgc : '' + item[:deductible] = (ddbl = seq.FlagSB20) ? ddbl : '' + item[:atc_code] = (atcc = seq.AtcCode) ? atcc : '' + item[:comment_de] = (info = seq.CommentDe) ? info : '' + item[:comment_fr] = (info = seq.CommentFr) ? info : '' item[:it_code] = '' - seq.xpath('.//ItCode').each do |itc| + seq.ItCodes.ItCode.each do |itc| if item[:it_code].to_s.empty? - it_code = itc.attr('Code').to_s + it_code = itc.Code.to_s item[:it_code] = (it_code =~ /(\d+)\.(\d+)\.(\d+)./) ? it_code : '' end end item[:substances] = [] - seq.xpath('.//Substance').each_with_index do |sub, i| + seq.Substances.Substance.each_with_index do |sub, i| item[:substances] << { :index => i.to_s, - :name => (name = sub.at_xpath('.//DescriptionLa')) ? name.text : '', - :quantity => (qtty = sub.at_xpath('.//Quantity')) ? qtty.text : '', - :unit => (unit = sub.at_xpath('.//QuantityUnit')) ? unit.text : '', + :name => (name = sub.DescriptionLa) ? name : '', + :quantity => (qtty = sub.Quantity) ? qtty : '', + :unit => (unit = sub.QuantityUnit) ? unit : '', } end item[:pharmacodes] = [] item[:packages] = {} # pharmacode => package - seq.xpath('.//Pack').each do |pac| - phar = pac.attr('Pharmacode') + seq.Packs.Pack.each do |pac| + phar = pac.Pharmacode phar = correct_code(phar.to_s, 7) - ean = pac.at_xpath('.//GTIN') - search_key = phar.to_i != 0 ? phar : ean.text + ean = pac.GTIN + search_key = phar.to_i != 0 ? phar : ean # as common key with swissINDEX item[:pharmacodes] << phar # packages item[:packages][search_key] = { :pharmacode => phar, - :ean => (ean) ? ean.text : '', - :swissmedic_category => (cat = pac.at_xpath('.//SwissmedicCategory')) ? cat.text : '', - :swissmedic_number8 => (num = pac.at_xpath('.//SwissmedicNo8')) ? num.text.rjust(8, '0') : '', - :narcosis_flag => (flg = pac.at_xpath('.//FlagNarcosis')) ? flg.text : '', + :ean => (ean) ? ean : '', + :swissmedic_category => (cat = pac.SwissmedicCategory) ? cat : '', + :swissmedic_number8 => (num = pac.SwissmedicNo8) ? num.rjust(8, '0') : '', + :narcosis_flag => (flg = pac.FlagNarcosis) ? flg : '', :prices => { :exf_price => { - :price => (exf = pac.at_xpath('.//ExFactoryPrice/Price')) ? exf.text : '', - :valid_date => (exf = pac.at_xpath('.//ExFactoryPrice/ValidFromDate')) ? exf.text : '', - :price_code => (exf = pac.at_xpath('.//ExFactoryPrice/PriceTypeCode')) ? exf.text : '', + :price => (exf = pac.Prices.ExFactoryPrice.Price) ? exf : '', + :valid_date => (exf = pac.Prices.ExFactoryPrice.ValidFromDate) ? exf : '', + :price_code => (exf = pac.Prices.ExFactoryPrice.PriceTypeCode) ? exf : '', }, :pub_price => { - :price => (pub = pac.at_xpath('.//PublicPrice/Price')) ? pub.text : '', - :valid_date => (pub = pac.at_xpath('.//PublicPrice/ValidFromDate')) ? pub.text : '', - :price_code => (pub = pac.at_xpath('.//PublicPrice/PriceTypeCode')) ? pub.text : '', + :price => (pub = pac.Prices.PublicPrice.Price) ? pub : '', + :valid_date => (pub = pac.Prices.PublicPrice.ValidFromDate) ? pub : '', + :price_code => (pub = pac.Prices.PublicPrice.PriceTypeCode) ? pub : '', } } } @@ -105,11 +261,11 @@ module Oddb2xml item[:packages][search_key][:limitations] = [] limitations = Hash.new{|h,k| h[k] = [] } # in seq - limitations[:seq] = (lims = seq.xpath('.//Limitations/Limitation')) ? lims.to_a : nil + limitations[:seq] = (lims = seq.Limitations.Limitation) ? lims.to_a : nil # in it-codes - limitations[:itc] = (lims = seq.xpath('.//ItCodes/ItCode/Limitations/Limitation')) ? lims.to_a : nil + limitations[:itc] = (lims = seq.ItCodes.ItCode.first.Limitations) ? lims.to_a : nil # in pac - limitations[:pac] = (lims = pac.xpath('.//Limitations/Limitation')) ? lims.to_a : nil + limitations[:pac] = (lims = pac.Limitations) ? lims.to_a : nil limitations.each_pair do |lim_key, lims| key = '' id = '' @@ -130,16 +286,16 @@ module Oddb2xml :it => item[:it_code], :key => key, :id => id, - :code => (lic = lim.at_xpath('.//LimitationCode')) ? lic.text : '', - :type => (lit = lim.at_xpath('.//LimitationType')) ? lit.text : '', - :value => (liv = lim.at_xpath('.//LimitationValue')) ? liv.text : '', - :niv => (niv = lim.at_xpath('.//LimitationNiveau')) ? niv.text : '', - :desc_de => (dsc = lim.at_xpath('.//DescriptionDe')) ? dsc.text : '', - :desc_fr => (dsc = lim.at_xpath('.//DescriptionFr')) ? dsc.text : '', - :vdate => (dat = lim.at_xpath('.//ValidFromDate')) ? dat.text : '', + :code => (lic = lim.LimitationCode) ? lic : '', + :type => (lit = lim.LimitationType) ? lit : '', + :value => (liv = lim.LimitationValue) ? liv : '', + :niv => (niv = lim.LimitationNiveau) ? niv : '', + :desc_de => (dsc = lim.DescriptionDe) ? dsc : '', + :desc_fr => (dsc = lim.DescriptionFr) ? dsc : '', + :vdate => (dat = lim.ValidFromDate) ? dat : '', } deleted = false - if upto = ((thr = lim.at_xpath('.//ValidThruDate')) ? thr.text : nil) and + if upto = ((thr = lim.ValidThruDate) ? thr : nil) and upto =~ /\d{2}\.\d{2}\.\d{2}/ begin deleted = true if Date.strptime(upto, '%d.%m.%y') >= Date.today @@ -148,11 +304,11 @@ module Oddb2xml end limitation[:del] = deleted item[:packages][search_key][:limitations] << limitation - end + end if lims end # limitation points - pts = pac.at_xpath('.//PointLimitations/PointLimitation/Points') # only first points - item[:packages][search_key][:limitation_points] = pts ? pts.text : '' + pts = pac.PointLimitations.PointLimitation # only first points + item[:packages][search_key][:limitation_points] = pts ? pts : '' # pharmacode => seq (same data) data[search_key] = item end diff --git a/oddb2xml.gemspec b/oddb2xml.gemspec index 2a489ee..0ab3099 100644 --- a/oddb2xml.gemspec +++ b/oddb2xml.gemspec @@ -24,7 +24,8 @@ Gem::Specification.new do |spec| spec.add_dependency 'savon', '~> 2.0' spec.add_dependency 'spreadsheet' spec.add_dependency 'rubyXL', '~> 2.5' - + spec.add_dependency 'sax-machine' # , '0.0.10' + # spec.add_dependency 'sax-machine', :git => "https://github.com/gregwebs/sax-machine.git" spec.add_development_dependency "bundler" spec.add_development_dependency "rake" diff --git a/spec/builder_spec.rb b/spec/builder_spec.rb index 0d8d3de..b929213 100644 --- a/spec/builder_spec.rb +++ b/spec/builder_spec.rb @@ -29,7 +29,6 @@ describe Oddb2xml::Builder do setup_server_mocks setup_swiss_index_server_mock(types = ['NonPharma', 'Pharma']) end - if true context 'should handle BAG-articles with and without pharmacode' do it { dat = File.read(File.expand_path('../data/Preparation.xml', __FILE__)) @@ -100,8 +99,6 @@ describe Oddb2xml::Builder do # oddb_dat.should match(/001349002780100B010710076806206900842/), "should match EAN of Desitin" end it "pending should match EAN of Desitin. returns 0 at the moment" - - end end context 'when option -e is given' do let(:cli) do -- 1.9.1