require 'strscan' module ODDB module Util module IsoLatin1 DOWNCASE_PAIRS = { "Å" => "å", "Æ" => "æ", "Ä" => "ä", "Á" => "á", "Â" => "â", "À" => "à", "Ã" => "ã", "Ą" => "ą", "Ǎ" => "ǎ", "Ă" => "ă", "Ā" => "ā", "Ȧ" => "ȧ", "Ḃ" => "ḃ", "Ç" => "ç", "Ĉ" => "ĉ", "Č" => "č", "Ć" => "ć", "Ċ" => "ċ", "Ḑ" => "ḑ", "Đ" => "đ", "Ð" => "ð", "Ď" => "ď", "Ḋ" => "ḋ", "Ë" => "ë", "É" => "é", "Ê" => "ê", "È" => "è", "Ȩ" => "ȩ", "Ę" => "ę", "Ě" => "ě", "Ĕ" => "ĕ", "Ẽ" => "ẽ", "Ē" => "ē", "Ė" => "ė", "Þ" => "þ", "Ḟ" => "ḟ", "Ģ" => "ģ", "Ǧ" => "ǧ", "Ğ" => "ğ", "Ǵ" => "ǵ", "Ĝ" => "ĝ", "Ḡ" => "ḡ", "Ġ" => "ġ", "Ȟ" => "ȟ", "Ĥ" => "ĥ", "Ḧ" => "ḧ", "Ḩ" => "ḩ", "Ḣ" => "ḣ", "Ï" => "ï", "Í" => "í", "Î" => "î", "Ì" => "ì", "Į" => "į", "Ǐ" => "ǐ", "Ĭ" => "ĭ", "Ĩ" => "ĩ", "İ" => "ı", "Ĵ" => "ĵ", "Ǩ" => "ǩ", "Ḱ" => "ḱ", "Ķ" => "ķ", "Ł" => "ł", "Ĺ" => "ĺ", "Ľ" => "ľ", "Ļ" => "ļ", "Ḿ" => "ḿ", "Ṁ" => "ṁ", "Ň" => "ň", "Ń" => "ń", "Ñ" => "ñ", "Ǹ" => "ǹ", "Ņ" => "ņ", "Ṅ" => "ṅ", "Œ" => "œ", "Ö" => "ö", "Ó" => "ó", "Ô" => "ô", "Ò" => "ò", "Õ" => "õ", "Ō" => "ō", "Ŏ" => "ŏ", "Ø" => "ø", "Ǫ" => "ǫ", "Ǒ" => "ǒ", "Ȯ" => "ȯ", "Ṕ" => "ṕ", "Ṗ" => "ṗ", "Ř" => "ř", "Ŕ" => "ŕ", "Ŗ" => "ŗ", "Ṙ" => "ṙ", "Ś" => "ś", "Ŝ" => "ŝ", "Š" => "š", "Ş" => "ş", "Ṡ" => "ṡ", "Ť" => "ť", "Ţ" => "ţ", "Ṫ" => "ṫ", "Ü" => "ü", "Ú" => "ú", "Û" => "û", "Ù" => "ù", "Ų" => "ų", "Ǘ" => "ǘ", "Ǔ" => "ǔ", "Ǚ" => "ǚ", "Ǜ" => "ǜ", "Ũ" => "ũ", "Ŭ" => "ŭ", "Ů" => "ů", "Ǖ" => "ǖ", "Ṽ" => "ṽ", "Ẃ" => "ẃ", "Ŵ" => "ŵ", "Ẁ" => "ẁ", "Ẅ" => "ẅ", "Ẇ" => "ẇ", "Ẍ" => "ẍ", "Ẋ" => "ẋ", "Ÿ" => "ÿ", "Ẏ" => "ẏ", "Ỹ" => "ỹ", "Ỳ" => "ỳ", "Ŷ" => "ŷ", "Ý" => "ý", "Ȳ" => "ȳ", "Ž" => "ž", "Ź" => "ź", "Ẑ" => "ẑ", "Ż" => "ż" } DOWNCASE_PTRN = /[#{DOWNCASE_PAIRS.keys.join}]/u end end end def dose_from_measure(measure) values = measure ? measure[0,2] : [1,nil] #Dose.new(*values) end def dose_from_scale(scale) values = scale ? scale[1,2] : [1,nil] #Dose.new(*values) end def dose_from_multi(multi) unless(multi.nil?) #multi.childrens.inject(UNIT) { |inj, node| multi.inject(1) { |inj, node| unit = (node[1] if node[1]) #dose = Dose.new(node[0].value, unit) dose = node[0].to_f inj *= dose } else #UNIT 1 end end def _parse_size(size) #unit_pattern = /(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|(Mio\.?\s)?((U\.?I\.?)|(I\.E\.))|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)/ unit_pattern = /(([kmµucMG]?([glLJm]|mol|Bq))(\/([mµu]?[glL]))?)|(Mio\.?\s)?((U\.?I\.?)|(I\.E\.))|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)/ numeric_pattern = /\d+(\'\d+)*([.,]\d*)?/ isolatin1 = ODDB::Util::IsoLatin1::DOWNCASE_PAIRS.values.join iso_pattern = /[[:alpha:]()\-#{isolatin1}]+/ description = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u numeric = /#{numeric_pattern}/u unit = /#{unit_pattern}/u count = /(?je)?\s*(?#{numeric})/ multiple = /(?#{numeric})\s*(?#{unit})?\s*(?[xXà]|Set)/ measure = /(?#{numeric})\s*(?#{unit})\s*(?#{unit})?/ addition = /(?#{numeric})\s*(?#{unit})?\s*(?\+)/ range = /(?\-)\s*(?#{numeric}\s*(?#{unit})?)/ scale = /(?(\/|pro))\s*(?#{numeric})?\s*(?#{unit})/ dose = /\(\s*#{numeric}\s*#{unit}\s*\)/ s = StringScanner.new(size) s_multi = [] s_comform = "" s_count = nil until s.eos? s.skip(/\s+/) case when s.scan(/#{multiple}/) m = s[0].match(/#{multiple}/) s_multi << [m[:numeric], m[:unit], m[:set]] unless s_count when s.scan(/#{addition}/) m = s[0].match(/#{addition}/) s_addition = [m[:numeric], m[:unit], m[:plus]] when s.scan(/#{measure}/) m = s[0].match(/#{measure}/) s_measure = [m[:numeric], m[:unit1], m[:unit2]] unless s_measure when s.scan(/#{count}/) m = s[0].match(/#{count}/) s_count = [m[:je], m[:numeric]] unless s_count when s.scan(/#{range}/) m = s[0].match(/#{range}/) s_range = [m[:minus], m[:numeric], m[:unit]] when s.scan(/#{scale}/) m = s[0].match(/#{scale}/) s_scale = [m[:slash], m[:numeric], m[:unit]] when s.scan(/#{dose}/) s_dose = s[0] when s.scan(/#{description}/) s_comform += s[0] when s.scan(/.*/) end end # s_multi = s_multi ? s_multi : [] s_comform = nil if s_comform.empty? #[s_multi, s_addition, s_count, s_measure, s_scale, s_dose, s_comform] s_count = (s_count ? s_count[1].to_i : 1) [ (s_addition ? s_addition.first.to_i : 0), dose_from_multi(s_multi), s_count, dose_from_measure(s_measure), dose_from_scale(s_scale), s_comform, ] # [s_multi, s_addition, s_count, s_measure, s_scale, s_dose, s_comform] end str_list = [ # '9 Suppositorien', # '10 ', # '200 ml', # '10x200 ml', # '5 Tüchlein', '5 Set', # '10 x 5 Mio I.E.', # '200 (4 x 50) Tablette(n)', # '9x10 Kapsel(n)', # '10 x 5 Mio I.E.', # '7.5cm x 22.5cm imprägnierter Verband', # '2 x 5 x 75 ml', # '10 x 1 x 50 ml', # '2,2 ml 82 MBq', # '1x 5000mg/100ml Durchstechflasche(n)', # '50 mg / 25 ml', ] File.readlines('size.dat').each do |str| #str_list.each do |str| print "%-25s: " % str.chomp.inspect p _parse_size(str) end