<< | Index | >>
-rw-r--r-- 1 masa masa 337M 4. Nov 07:19 oddb.org_database.bak.20111104.sql.gz -rw-r--r-- 1 masa masa 363M 3. Nov 07:24 oddb.org.20111102.sql.gz (This is the original file)
The creating of some index tables looked failing
TypeError: can't convert String into Integer when updating index 'sequence_generic_type_index' with a ODDB::Sequence Encoding::CompatibilityError: incompatible character encodings: UTF-8 and ASCII-8BIT when updating index 'analysis_index_de' with a ODDB::Analysis::Position Encoding::CompatibilityError: incompatible character encodings: UTF-8 and ASCII-8BIT when updating index 'analysis_index_fr' with a ODDB::Analysis::Position NoMethodError: undefined method `values' for nil:NilClass when updating index 'sequence_vaccine' with a ODDB::Registration
Delete parts
require 'rockit/rockit'
require 'rockit/rockit' @parser = Parse.generate_parser <<-EOG Grammar OddbSize Tokens STEP = /!/u ARG = /,/u PTR = /:/u PEND = /\\./u EXPR = /([^!,:.%]|%[!,:.%])+/u Productions Pointer -> PTR Step* PEND? [: _, steps, _] Step -> STEP EXPR Arg* [: _, command, arguments] Arg -> ARG (EXPR | Pointer) [: _, argument] EOG def parse(string) ast = @parser.parse(string) ast.compact! produce_pointer(ast) end
Replace
def pointer(value) raise SBSM::InvalidDataError.new("e_not_available_pointer", :pointer, value) end
The condition to use Part#size= in update_bsv
The codition to use Part#size= in update_swissmedic
module ODDB module SizeParser unit_pattern = '(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)' numeric_pattern = '\d+(\'\d+)*([.,]\d+)?' iso_pattern = "[[:alpha:]()\-]+" @@parser = Parse.generate_parser <<-EOG Grammar OddbSize Tokens DESCRIPTION = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u NUMERIC = /#{numeric_pattern}/u SPACE = /\s+/u [:Skip] UNIT = /#{unit_pattern}/u Productions Size -> Multiple* Addition? Count? Measure? Scale? Dose? DESCRIPTION? Count -> 'je'? NUMERIC Multiple -> NUMERIC UNIT? /[xXà]|Set/u Measure -> NUMERIC UNIT UNIT? Addition -> NUMERIC UNIT? '+' Scale -> '/' NUMERIC? UNIT Dose -> '(' NUMERIC UNIT ')' EOG
def parse_size(size) multi, addition, count, measure, scale, dose, comform = nil begin ast = @@parser.parse(size) multi, addition, count, measure, scale, dose, comform = ast.flatten count = (count ? count[1].value.to_i : 1) rescue ParseException, AmbigousParseException => e count = size.to_i end [ (addition ? addition.first.value.to_i : 0), dose_from_multi(multi), count, dose_from_measure(measure), dose_from_scale(scale), (comform.value if comform), ] end
Simple Rockit sample
require 'rockit/rockit' parser = Parse.generate_parser <<-'END_OF_GRAMMAR' Grammar ExampleGrammar Tokens Blank = /\s+/ [:Skip] Number = /\d+/ Alphabet = /\w+/ Productions Expr -> 'a' Number? Alphabet END_OF_GRAMMAR str1 = "a 123 abc" str2 = "a abc" p parser.parse(str1).flatten p parser.parse(str2).flatten regexp = /(a)\s*(\d+)?\s*(\w+)/ p regexp.match(str1).to_a[1, 10] p regexp.match(str2).to_a[1, 10] reg1 = '(a)' reg2 = '(\d+)?' reg3 = '(\w+)' regexp = [reg1, reg2, reg3].join('\s*') regexp = Regexp.new(regexp) p regexp p regexp.match(str1).to_a[1, 10] p regexp.match(str2).to_a[1, 10] reg1 = /(a)/ reg2 = /(\d+)/ reg3 = /(\w+)/ regexp = /#{reg1}\s*#{reg2}?\s*#{reg3}/ p regexp p regexp.match(str1).to_a[1, 10] p regexp.match(str2).to_a[1, 10]
Result
["a", "123", "abc"] ["a", nil, "abc"] ["a", "123", "abc"] ["a", nil, "abc"] /(a)\s*(\d+)?\s*(\w+)/ ["a", "123", "abc"] ["a", nil, "abc"] /(?-mix:(a))\s*(?-mix:(\d+)?)\s*(?-mix:(\w+))/ ["a", "123", "abc"] ["a", nil, "abc"]
Experiment
def _parse_size(size) unit_pattern = /(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)/ numeric_pattern = /\d+(\'\d+)*([.,]\d+)?/ iso_pattern = /[[:alpha:]()\-]+/ description = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u numeric = /#{numeric_pattern}/u unit = /#{unit_pattern}/u count = /(je)?\s*#{numeric}/ multiple = /(#{numeric})\s*(#{unit})?\s*([xXà]|Set)/ measure = /#{numeric}\s*#{unit}\s*#{unit}?/ addition = /#{numeric}\s*#{unit}?\s*\+/ scale = /\/s*#{numeric}?\s*#{unit}/ dose = /\(\s*#{numeric}\s*#{unit}\s*\)/ parser = /(?<multiple>#{multiple}*)\s*(?<addition>#{addition}?)\s*(?<count>#{count}?)\s*(?<measure>#{measure}?)\s*(?<scale>#{scale}?)\s*(?<dose>#{dose}?)\s*(?<description>#{description}?)/ size.match(parser) end
Note
Experiment
Result
[_ArrayNode, nil, nil, Measure:["200","ml",nil], nil, nil, nil]
Result
["200 ml", "", "", "200", "", "", "", "ml"]
Note
["200 ml", "", "", "", "200 ml", "", "", ""]