require 'rockit/rockit' def parse_size(size) unit_pattern = '(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)' numeric_pattern = '\d+(\'\d+)*([.,]\d+)?' iso_pattern = "[[:alpha:]()\-]+" @parser = Parse.generate_parser <<-EOG Grammar OddbSize Tokens DESCRIPTION = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u NUMERIC = /#{numeric_pattern}/u SPACE = /\s+/u [:Skip] UNIT = /#{unit_pattern}/u Productions Size -> Multiple* Addition? Count? Measure? Scale? Dose? DESCRIPTION? Count -> 'je'? NUMERIC Multiple -> NUMERIC UNIT? /[xXà]|Set/u Measure -> NUMERIC UNIT UNIT? Addition -> NUMERIC UNIT? '+' Scale -> '/' NUMERIC? UNIT Dose -> '(' NUMERIC UNIT ')' EOG multi, addition, count, measure, scale, dose, comform = nil begin multi, addition, count, measure, scale, dose, comform = @parser.parse(size).flatten rescue ParseException, AmbigousParseException => e count = size.to_i end [multi, addition, count, measure, scale, dose, comform] end p parse_size('200 ml')