From ac6772f14ad5fb9be02a0e818b6d3032c85cc2a2 Mon Sep 17 00:00:00 2001 From: Niklaus Giger Date: Wed, 18 Mar 2015 09:25:15 +0100 Subject: [PATCH] Reworked parsing compositions in swissmedic. Take 12 Signed-off-by: Niklaus Giger --- ext/swissindex/test/test_swissindex.rb | 18 ++- jobs/import_swissmedic_only | 17 +- src/plugin/parse_compositions.rb | 80 ++++++++++ src/plugin/swissmedic.rb | 263 ++++++++++++++++--------------- src/util/updater.rb | 1 + test/data/xlsx/Packungen_2014_small.xlsx | Bin 28948 -> 147365 bytes test/test_plugin/swissmedic.rb | 175 ++++++++++++++++---- 7 files changed, 390 insertions(+), 164 deletions(-) create mode 100644 src/plugin/parse_compositions.rb diff --git a/ext/swissindex/test/test_swissindex.rb b/ext/swissindex/test/test_swissindex.rb index f06b776..539e32b 100644 --- a/ext/swissindex/test/test_swissindex.rb +++ b/ext/swissindex/test/test_swissindex.rb @@ -9,7 +9,7 @@ gem 'minitest' require 'minitest/autorun' require 'flexmock' require 'swissindex' - +require 'pry' module ODDB module Swissindex @@ -18,18 +18,25 @@ module ODDB def setup @nonpharma = ODDB::Swissindex::SwissindexNonpharma.new end + def test_download_all + binding.pry + result = @nonpharma.download_all + assert_equal(pharmacode, result[:phar]) + assert(nil != result[:dscr]) + end def test_search_item + binding.pry nonpharma = {:item => {'key' => 'item'}} pharmacode = '6134345' result = @nonpharma.search_item(pharmacode) assert_equal(pharmacode, result[:phar]) assert(nil != result[:dscr]) - end + end if false end class TestSwissindex false, + :iksnrs => [], + :update_compositions => :false, + } + ARGV.each do |arg| + if arg =~ /fix_galenic_form/ + opts[:fix_galenic_form] = true + elsif arg =~ /update_comps/ + opts[:update_compositions] = true + elsif arg =~ /^\d+$/ + opts[:iksnrs] << arg.to_i if arg.size > 0 + end + end + puts "opts == #{opts.inspect}\n" updater = Updater.new system - updater.update_swissmedic + updater.update_swissmedic(opts) end end end diff --git a/src/plugin/parse_compositions.rb b/src/plugin/parse_compositions.rb new file mode 100644 index 0000000..d09e25b --- /dev/null +++ b/src/plugin/parse_compositions.rb @@ -0,0 +1,80 @@ +# encoding: utf-8 + +# This file is shared since oddb2xml 2.0.0 (lib/oddb2xml/parse_compositions.rb) +# with oddb.org src/plugin/parse_compositions.rb +# +# It allows an easy parsing of the column P Zusammensetzung of the swissmedic packages.xlsx file +# + +module ParseUtil + SCALE_P = %r{pro\s+(?(?[\d.,]+)\s*(?[kcmuµn]?[glh]))}u + ParseComposition = Struct.new("ParseComposition", :source, :label, :label_description, :substances, :galenic_form, :route_of_administration) + ParseSubstance = Struct.new("ParseSubstance", :name, :qty, :unit, :chemical_substance, :chemical_dose) + def ParseUtil.capitalize(string) + string.split(/\s+/u).collect { |word| word.capitalize }.join(' ') + end + + def ParseUtil.parse_compositions(composition) + rep_1 = '----'; to_1 = '(' + rep_2 = '-----'; to_2 = ')' + rep_3 = '------'; to_3 = ',' + + comps = [] + label_pattern = /^(?