require 'strscan' module ODDB module Util module IsoLatin1 DOWNCASE_PAIRS = { "Å" => "å", "Æ" => "æ", "Ä" => "ä", "Á" => "á", "Â" => "â", "À" => "à", "Ã" => "ã", "Ą" => "ą", "Ǎ" => "ǎ", "Ă" => "ă", "Ā" => "ā", "Ȧ" => "ȧ", "Ḃ" => "ḃ", "Ç" => "ç", "Ĉ" => "ĉ", "Č" => "č", "Ć" => "ć", "Ċ" => "ċ", "Ḑ" => "ḑ", "Đ" => "đ", "Ð" => "ð", "Ď" => "ď", "Ḋ" => "ḋ", "Ë" => "ë", "É" => "é", "Ê" => "ê", "È" => "è", "Ȩ" => "ȩ", "Ę" => "ę", "Ě" => "ě", "Ĕ" => "ĕ", "Ẽ" => "ẽ", "Ē" => "ē", "Ė" => "ė", "Þ" => "þ", "Ḟ" => "ḟ", "Ģ" => "ģ", "Ǧ" => "ǧ", "Ğ" => "ğ", "Ǵ" => "ǵ", "Ĝ" => "ĝ", "Ḡ" => "ḡ", "Ġ" => "ġ", "Ȟ" => "ȟ", "Ĥ" => "ĥ", "Ḧ" => "ḧ", "Ḩ" => "ḩ", "Ḣ" => "ḣ", "Ï" => "ï", "Í" => "í", "Î" => "î", "Ì" => "ì", "Į" => "į", "Ǐ" => "ǐ", "Ĭ" => "ĭ", "Ĩ" => "ĩ", "İ" => "ı", "Ĵ" => "ĵ", "Ǩ" => "ǩ", "Ḱ" => "ḱ", "Ķ" => "ķ", "Ł" => "ł", "Ĺ" => "ĺ", "Ľ" => "ľ", "Ļ" => "ļ", "Ḿ" => "ḿ", "Ṁ" => "ṁ", "Ň" => "ň", "Ń" => "ń", "Ñ" => "ñ", "Ǹ" => "ǹ", "Ņ" => "ņ", "Ṅ" => "ṅ", "Œ" => "œ", "Ö" => "ö", "Ó" => "ó", "Ô" => "ô", "Ò" => "ò", "Õ" => "õ", "Ō" => "ō", "Ŏ" => "ŏ", "Ø" => "ø", "Ǫ" => "ǫ", "Ǒ" => "ǒ", "Ȯ" => "ȯ", "Ṕ" => "ṕ", "Ṗ" => "ṗ", "Ř" => "ř", "Ŕ" => "ŕ", "Ŗ" => "ŗ", "Ṙ" => "ṙ", "Ś" => "ś", "Ŝ" => "ŝ", "Š" => "š", "Ş" => "ş", "Ṡ" => "ṡ", "Ť" => "ť", "Ţ" => "ţ", "Ṫ" => "ṫ", "Ü" => "ü", "Ú" => "ú", "Û" => "û", "Ù" => "ù", "Ų" => "ų", "Ǘ" => "ǘ", "Ǔ" => "ǔ", "Ǚ" => "ǚ", "Ǜ" => "ǜ", "Ũ" => "ũ", "Ŭ" => "ŭ", "Ů" => "ů", "Ǖ" => "ǖ", "Ṽ" => "ṽ", "Ẃ" => "ẃ", "Ŵ" => "ŵ", "Ẁ" => "ẁ", "Ẅ" => "ẅ", "Ẇ" => "ẇ", "Ẍ" => "ẍ", "Ẋ" => "ẋ", "Ÿ" => "ÿ", "Ẏ" => "ẏ", "Ỹ" => "ỹ", "Ỳ" => "ỳ", "Ŷ" => "ŷ", "Ý" => "ý", "Ȳ" => "ȳ", "Ž" => "ž", "Ź" => "ź", "Ẑ" => "ẑ", "Ż" => "ż" } DOWNCASE_PTRN = /[#{DOWNCASE_PAIRS.keys.join}]/u end end end def _parse_size(size) unit_pattern = /(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)/ numeric_pattern = /\d+(\'\d+)*([.,]\d+)?/ isolatin1 = ODDB::Util::IsoLatin1::DOWNCASE_PAIRS.values.join iso_pattern = /[[:alpha:]()\-#{isolatin1}]+/ description = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u numeric = /#{numeric_pattern}/u unit = /#{unit_pattern}/u count = /(?je)?\s*(?#{numeric})/ multiple = /(?#{numeric})\s*(?#{unit})?\s*(?[xXà]|Set)/ measure = /(?#{numeric})\s*(?#{unit})\s*(?#{unit})?/ addition = /#{numeric}\s*#{unit}?\s*\+/ scale = /\/s*#{numeric}?\s*#{unit}/ dose = /\(\s*#{numeric}\s*#{unit}\s*\)/ s = StringScanner.new(size) until s.eos? s.skip(/\s+/) case when s.scan(/#{addition}/) s_addition = s[0] when s.scan(/#{measure}/) m = s[0].match(/#{measure}/) s_measure = [m[:numeric], m[:unit1], m[:unit2]] when s.scan(/#{count}/) m = s[0].match(/#{count}/) s_count = [m[:je], m[:numeric]] when s.scan(/#{multiple}/) m = s[0].match(/#{multiple}/) s_multi = [m[:numeric], m[:unit], m[:set]] when s.scan(/#{scale}/) s_scale = s[0] when s.scan(/#{dose}/) s_dose = s[0] when s.scan(/#{description}/) s_comform = s[0] when s.scan(/.*/) end end s_multi = s_multi ? s_multi : [] [s_multi, s_addition, s_count, s_measure, s_scale, s_dose, s_comform] end str_list = [ '9 Suppositorien', '10 ', '200 ml', '10x200 ml', # '5 Tüchlein', # '10 Set', #'10 x 5 Mio I.E.', ] #File.readlines('size.dat').each do |str| str_list.each do |str| print "%-25s: " % str.chomp.inspect p _parse_size(str) end