require 'rockit/rockit' def parse_size(size) unit_pattern = '(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)' numeric_pattern = '\d+(\'\d+)*([.,]\d+)?' iso_pattern = "[[:alpha:]()\-]+" @parser = Parse.generate_parser <<-EOG Grammar OddbSize Tokens DESCRIPTION = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u NUMERIC = /#{numeric_pattern}/u SPACE = /\s+/u [:Skip] UNIT = /#{unit_pattern}/u Productions Size -> Multiple* Addition? Count? Measure? Scale? Dose? DESCRIPTION? Count -> 'je'? NUMERIC Multiple -> NUMERIC UNIT? /[xXà]|Set/u Measure -> NUMERIC UNIT UNIT? Addition -> NUMERIC UNIT? '+' Scale -> '/' NUMERIC? UNIT Dose -> '(' NUMERIC UNIT ')' EOG multi, addition, count, measure, scale, dose, comform = nil begin multi, addition, count, measure, scale, dose, comform = @parser.parse(size).flatten #count = (count ? count[1].value.to_i : 1) count = count ? count.childrens : nil multi = multi ? multi.childrens : nil multi = multi[0].childrens unless multi.empty? scale = scale ? scale.childrens : nil measure = measure ? measure.childrens : nil rescue ParseException, AmbigousParseException => e count = size.to_i end [multi, addition, count, measure, scale, dose, comform] end str_list = [ '9 Suppositorien', '10 ', '200 ml', '10x200 ml', # '5 Tüchlein', # '10 Set', #'10 x 5 Mio I.E.', ] #File.readlines('size.dat').each do |str| str_list.each do |str| print "%-25s: " % str.chomp.inspect p parse_size(str.chomp) end