From 6705a1aa3777965afa4a36f8f1ad5ea20a820da6 Mon Sep 17 00:00:00 2001 From: Niklaus Giger Date: Wed, 11 Mar 2015 17:32:36 +0100 Subject: [PATCH] Emit all components mentioned in column_M Signed-off-by: Niklaus Giger --- lib/oddb2xml/calc.rb | 80 ++++++++++++++++++++++++-------------------- spec/calc_spec.rb | 93 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 111 insertions(+), 62 deletions(-) diff --git a/lib/oddb2xml/calc.rb b/lib/oddb2xml/calc.rb index 93939a9..4ae6526 100644 --- a/lib/oddb2xml/calc.rb +++ b/lib/oddb2xml/calc.rb @@ -92,7 +92,7 @@ module Oddb2xml 'ovale Körper', 'tube(s)', ] - Mesurements = [ 'g', 'kg', 'l', 'mg', 'ml', 'cm', 'GBq'] + Measurements = [ 'g', 'kg', 'l', 'mg', 'ml', 'cm', 'GBq'] Others = ['Kombipackung', 'emballage combiné' ] UnknownGalenicForm = 140 UnknownGalenicGroup = 1 @@ -153,35 +153,6 @@ module Oddb2xml end public SCALE_P = %r{pro\s+(?(?[\d.,]+)\s*(?[kcmuµn]?[glh]))}u - def self.update_active_agent(name, part) - units = 'U\.\s*Ph\.\s*Eur\.' - ptrn = %r{(?ix) - (^|[[:punct:]]|\bet|\bex)\s*#{Regexp.escape name}(?![:\-]) - (\s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(#{units}|[^\s,]+)) - (\s*[mv]/[mv])?)))? - (\s*(?:ut|corresp\.?)\s+(?[^\d,]+) - \s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(#{units}|[^\s,]+)) - (\s*[mv]/[mv])?))?)? - }u - if(match = ptrn.match(part.sub(/\.$/, ''))) - dose = match[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if match[:dose] - cdose = match[:cdose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if match[:cdose] - if dose && (scale = SCALE_P.match(part)) && !dose[1].include?('/') - unit = dose[1] << '/' - num = scale[:qty].to_f - if num <= 1 - unit << scale[:unit] - else - unit << scale[:scale] - end - end - if(chemical = match[:chemical]) - chemical = capitalize(chemical) - chemical = nil if chemical.empty? - end if false # TODO: - [name, dose ? [dose[0], dose[1]] : [nil, nil] ].flatten - end - end private def remove_duplicated_spaces(string) string ? string.to_s.gsub(/\s\s+/, ' ') : nil @@ -229,11 +200,50 @@ public end agents = [] comps = [] + units = 'U\.\s*Ph\.\s*Eur\.' + name = 'dummy' + ptrn = %r{(?ix) + (^|[[:punct:]]|\bet|\bex)\s*#{Regexp.escape name}(?![:\-]) + (\s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(#{units}|[^\s,]+)) + (\s*[mv]/[mv])?)))? + (\s*(?:ut|corresp\.?)\s+(?[^\d,]+) + \s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(#{units}|[^\s,]+)) + (\s*[mv]/[mv])?))?)? + }u compositions.each_with_index do |composition, idx| composition.gsub!(/'/, '') - @active_substances.each { |name| - name, qty, unit = Calc.update_active_agent(name, composition) - res << Composition.new(name, qty.to_f, unit, labels[idx] ? labels[idx].join('') : nil) if name + label = nil + composition_text.split(/\n/u).each { + |line| + if m = /^(?A|I|B|II|C|III|D|IV|E|V|F|VI)\)\s+(?[^\s:, ]+):/.match(line) + label = "#{m[:part_id]} #{m[:part_name]}" + end + filler = line.split(',')[-1].sub(/\.$/, '') + filler_match = /^(?[^,\d]+)\s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(filler) + components = line.split(',').each { + |component| + to_consider = component.strip.split(':')[-1] # remove label + m = /^(?[^,\d]+)\s*(?[\d\-.]+(\s*(?:(Mio\.?\s*)?(U\.\s*Ph\.\s*Eur\.|[^\s,]+))))/.match(to_consider) + if m + dose = nil + unit = nil + name = m[:name].split(/\s/).collect{ |x| x.capitalize } + dose = m[:dose].split(/\b\s*(?![.,\d\-]|Mio\.?)/u, 2) if m[:dose] + if dose && (scale = SCALE_P.match(filler)) && !dose[1].include?('/') + unit = dose[1] << '/' + num = scale[:qty].to_f + if num <= 1 + unit << scale[:unit] + else + unit << scale[:scale] + end + elsif dose.size == 2 + unit = dose[1] + end + to_add = Composition.new(name.join(' ').strip, dose ? dose[0].to_f : nil, unit, label) + res << to_add + end + } } end @compositions = res @@ -276,7 +286,7 @@ public begin return pkg_size_to_int(pkg_size_L) unless part_from_name_C part_from_name_C = part_from_name_C.gsub(/[()]/, '_') - Mesurements.each{ |x| + Measurements.each{ |x| if einheit_M and /^#{x}$/i.match(einheit_M) puts "measurement in einheit_M #{einheit_M} matched: #{x}" if $VERBOSE update_rule('measurement einheit_M') @@ -317,7 +327,7 @@ public return pkg_size_to_int(pkg_size_L, true) end } - Mesurements.each{ |x| + Measurements.each{ |x| if pkg_size_L and pkg_size_L.split(' ').index(x) puts "measurement in pkg_size_L #{pkg_size_L} matched: #{x}" if $VERBOSE update_rule('measurement pkg_size_L') diff --git a/spec/calc_spec.rb b/spec/calc_spec.rb index 7bc58ae..fd0fcd3 100644 --- a/spec/calc_spec.rb +++ b/spec/calc_spec.rb @@ -285,7 +285,6 @@ Corresp. 5300 kJ.", puts "Testing key #{key.inspect} #{value.inspect} against #{result} seems to fail" unless result == value.to_s result.should eq value.to_s } - XPath.match( doc, "//ARTICLE[GTIN='7680006790124']/COMPOSITIONS/COMPONENT/NAME").last.text.should eq 'Bifidobacterium Infantis' XPath.match( doc, "//ARTICLE[GTIN='7680545250363']/COMPOSITIONS/COMPONENT/NAME").last.text.should eq 'Alprostadilum' XPath.match( doc, "//ARTICLE[GTIN='7680458820202']/NAME").last.text.should eq 'Magnesiumchlorid 0,5 molar B. Braun, Zusatzampulle für Infusionslösungen' end @@ -322,16 +321,6 @@ Corresp. 5300 kJ.", # specify { expect(info.galenic_form.description).to eq "Infusionsemulsion" } end - context 'find correct result compositions' do - result = Calc.new(nil, nil, nil, 'rutosidum trihydricum, aescinum', 'rutosidum trihydricum 20 mg, aescinum 25 mg, aromatica, excipiens pro compresso.') - specify { expect(result.compositions.first.name).to eq 'Rutosidum Trihydricum' } - specify { expect(result.compositions.first.qty).to eq 20} - specify { expect(result.compositions.first.unit).to eq 'mg'} - specify { expect(result.compositions[1].name).to eq 'Aescinum' } - specify { expect(result.compositions[1].qty).to eq 25} - specify { expect(result.compositions[1].unit).to eq 'mg'} - end - context 'should handle CFU' do result = Calc.new(nil, nil, nil, 'lactobacillus acidophilus cryodesiccatus, bifidobacterium infantis', 'lactobacillus acidophilus cryodesiccatus min. 10^9 CFU, bifidobacterium infantis min. 10^9 CFU, color.: E 127, E 132, E 104, excipiens pro capsula.') @@ -339,26 +328,76 @@ Corresp. 5300 kJ.", end context 'find correct result compositions' do - result = Calc.new('Nutriflex Lipid peri, Infusionsemulsion, 1250ml', nil, nil, - 'glucosum anhydricum, zinci acetas dihydricus, isoleucinum, leucinum, lysinum anhydricum, methioninum, phenylalaninum, threoninum, tryptophanum, valinum, argininum, histidinum, alaninum, acidum asparticum, acidum glutamicum, glycinum, prolinum, serinum, magnesii acetas tetrahydricus, chloridum, phosphas, acetas, sojae oleum, triglycerida saturata media', - 'I) Glucoselösung: glucosum anhydricum 80 g ut glucosum monohydricum, natrii dihydrogenophosphas dihydricus 1.17 g, zinci acetas dihydricus 6.625 mg, acidum citricum q.s. ad pH, aqua ad iniectabilia q.s. ad solutionem pro 500 ml. + text = 'I) Glucoselösung: glucosum anhydricum 80 g ut glucosum monohydricum, natrii dihydrogenophosphas dihydricus 1.17 g glycerolum, zinci acetas dihydricus 6.625 mg, natrii oleas, aqua q.s. ad emulsionem pro 250 ml. II) Fettemulsion: sojae oleum 25 g, triglycerida saturata media 25 g, lecithinum ex ovo 3 g, glycerolum, natrii oleas, aqua q.s. ad emulsionem pro 250 ml. -III) Aminosäurenlösung: isoleucinum 2.34 g, leucinum 3.13 g, lysinum anhydricum 2.26 g ut lysini hydrochloridum, methioninum 1.96 g, phenylalaninum 3.51 g, threoninum 1.82 g, tryptophanum 0.57 g, valinum 2.6 g, argininum 2.7 g, histidinum 1.25 g ut histidini hydrochloridum monohydricum, alaninum 4.85 g, acidum asparticum 1.5 g, acidum glutamicum 3.5 g, glycinum 1.65 g, prolinum 3.4 g, serinum 3 g, natrii hydroxidum 0.8 g, natrii chloridum 1.081 g, natrii acetas trihydricus 0.544 g, kalii acetas 2.943 g, magnesii acetas tetrahydricus 0.644 g, calcii chloridum dihydricum 0.441 g, aqua ad iniectabilia q.s. ad solutionem pro 500 ml. +III) Aminosäurenlösung: isoleucinum 2.34 g, leucinum 3.13 g, lysinum anhydricum 2.26 g ut lysini hydrochloridum, methioninum 1.96 g, aqua ad iniectabilia q.s. ad solutionem pro 400 ml. . -I) et II) et III) corresp.: aminoacida 32 g/l, carbohydrata 64 g/l, materia crassa 40 g/l, natrium 40 mmol/l, kalium 24 mmol/l, calcium 2.4 mmol/l, magnesium 2.4 mmol, zincum 0.024 mmol/l, chloridum 38.4 mmol/l, phosphas 6 mmol/l, acetas 32 mmol/l, acidum citricum monohydricum, in emulsione recenter mixta 1250 ml. -Corresp. 4000 kJ.') +I) et II) et III) corresp.: aminoacida 32 g/l, acetas 32 mmol/l, acidum citricum monohydricum, in emulsione recenter mixta 1250 ml. +Corresp. 4000 kJ.' + result = Calc.new('Nutriflex Lipid peri, Infusionsemulsion, 1250ml', nil, nil, + 'glucosum anhydricum, zinci acetas dihydricus, isoleucinum, leucinum', + text + ) specify { expect(result.compositions.first.name).to eq 'Glucosum Anhydricum' } specify { expect(result.compositions.first.qty).to eq 80.0} - specify { expect(result.compositions.first.unit).to eq 'g/500 ml'} - zinci = result.compositions.find{ |x| x.name == 'Zinci Acetas Dihydricus' } - specify { expect(zinci.name).to eq 'Zinci Acetas Dihydricus' } - specify { expect(zinci.qty).to eq 6.625} - specify { expect(zinci.unit).to eq 'mg/500 ml'} - zinci = result.compositions.find{ |x| x.name == 'Zinci Acetas Dihydricus' } - natrii = result.compositions.find{ |x| x.name == 'Natrii Dihydrogenophosphas Dihydricus' } - specify { expect(zinci).not_to eq nil} - skip 'Is Natrii Dihydrogenophosphas Dihydricus a real error or not?' - # specify { expect(natrii).not_to eq nil } + specify { expect(result.compositions.first.unit).to eq 'g/250 ml'} + specify { expect(result.compositions.first.label).to eq 'I Glucoselösung' } + + # from II) + lecithinum = result.compositions.find{ |x| x.name.match(/lecithinum/i) } + specify { expect(lecithinum).not_to eq nil} + if lecithinum + specify { expect(lecithinum.name).to eq 'Lecithinum Ex Ovo' } + specify { expect(lecithinum.qty).to eq 3.0} + specify { expect(lecithinum.unit).to eq 'g/250 ml'} + specify { expect(lecithinum.label).to eq 'II Fettemulsion' } + end + + # From III + leucinum = result.compositions.find{ |x| x.name.eql?('Leucinum') and x.label.match(/^III /) } + specify { expect(leucinum).not_to eq nil} + if leucinum + specify { expect(leucinum.name).to eq 'Leucinum' } + specify { expect(leucinum.qty).to eq 3.13} + specify { expect(leucinum.unit).to eq 'g/400 ml'} + specify { expect(leucinum.label).to eq 'III Aminosäurenlösung' } + end + leucinum_I = result.compositions.find{ |x| x.name.eql?('Leucinum') and x.label.match(/^I /) } + specify { expect(leucinum_I).to eq nil} + leucinum_II = result.compositions.find{ |x| x.name.eql?('Leucinum') and x.label.match(/^II /) } + specify { expect(leucinum_II).to eq nil} + end + + context 'find correct result compositions' do + result = Calc.new(nil, nil, nil, 'rutosidum trihydricum, aescinum', 'rutosidum trihydricum 20 mg, aescinum 25 mg, aromatica, excipiens pro compresso.') + specify { expect(result.compositions.first.name).to eq 'Rutosidum Trihydricum' } + specify { expect(result.compositions.first.qty).to eq 20} + specify { expect(result.compositions.first.unit).to eq 'mg'} + specify { expect(result.compositions[1].name).to eq 'Aescinum' } + specify { expect(result.compositions[1].qty).to eq 25} + specify { expect(result.compositions[1].unit).to eq 'mg'} + end + context 'find correct result for Inflora, capsule' do + info = Calc.new(tst_infloran.name_C, tst_infloran.package_size_L, tst_infloran.einheit_M, tst_infloran.active_substance_0, tst_infloran.composition_P) + specify { expect(tst_infloran.url).to eq 'http://ch.oddb.org/de/gcc/drug/reg/00679/seq/02/pack/012' } + specify { expect(info.galenic_form.description).to eq 'capsule' } + skip { expect(info.galenic_group.description).to eq 'Injektion/Infusion' } + specify { expect(info.pkg_size).to eq '2x10' } + specify { expect(info.selling_units).to eq 20 } + skip { expect(info.measure).to eq '0' } + bifidobacterium = info.compositions.find{ |x| x.name.match(/Bifidobacterium/i) } + specify { expect(bifidobacterium).not_to eq nil} + if bifidobacterium + specify { expect(bifidobacterium.name).to eq 'Bifidobacterium Infantis Min.' } + skip { expect(bifidobacterium.qty).to eq '10^9'} + skip { expect(bifidobacterium.unit).to eq 'CFU'} + end + e_127 = info.compositions.find{ |x| x.name.match(/E 127/i) } + skip { expect(e_127).not_to eq nil} + if e_127 + specify { expect(e_127.name).to eq 'E 127' } + specify { expect(e_127.unit).to eq ''} + end end end -- 2.1.4