<< | Index | >>
-rw-r--r-- 1 masa masa 337M 4. Nov 07:19 oddb.org_database.bak.20111104.sql.gz -rw-r--r-- 1 masa masa 363M 3. Nov 07:24 oddb.org.20111102.sql.gz (This is the original file)
The creating of some index tables looked failing
TypeError: can't convert String into Integer when updating index 'sequence_generic_type_index' with a ODDB::Sequence Encoding::CompatibilityError: incompatible character encodings: UTF-8 and ASCII-8BIT when updating index 'analysis_index_de' with a ODDB::Analysis::Position Encoding::CompatibilityError: incompatible character encodings: UTF-8 and ASCII-8BIT when updating index 'analysis_index_fr' with a ODDB::Analysis::Position NoMethodError: undefined method `values' for nil:NilClass when updating index 'sequence_vaccine' with a ODDB::Registration
Delete parts
require 'rockit/rockit'
require 'rockit/rockit'
@parser = Parse.generate_parser <<-EOG
Grammar OddbSize
Tokens
STEP = /!/u
ARG = /,/u
PTR = /:/u
PEND = /\\./u
EXPR = /([^!,:.%]|%[!,:.%])+/u
Productions
Pointer -> PTR Step* PEND?
[: _, steps, _]
Step -> STEP EXPR Arg*
[: _, command, arguments]
Arg -> ARG (EXPR | Pointer)
[: _, argument]
EOG
def parse(string)
ast = @parser.parse(string)
ast.compact!
produce_pointer(ast)
end
Replace
def pointer(value)
raise SBSM::InvalidDataError.new("e_not_available_pointer", :pointer, value)
end
The condition to use Part#size= in update_bsv
The codition to use Part#size= in update_swissmedic
module ODDB
module SizeParser
unit_pattern = '(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)'
numeric_pattern = '\d+(\'\d+)*([.,]\d+)?'
iso_pattern = "[[:alpha:]()\-]+"
@@parser = Parse.generate_parser <<-EOG
Grammar OddbSize
Tokens
DESCRIPTION = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u
NUMERIC = /#{numeric_pattern}/u
SPACE = /\s+/u [:Skip]
UNIT = /#{unit_pattern}/u
Productions
Size -> Multiple* Addition? Count? Measure? Scale? Dose? DESCRIPTION?
Count -> 'je'? NUMERIC
Multiple -> NUMERIC UNIT? /[xXà]|Set/u
Measure -> NUMERIC UNIT UNIT?
Addition -> NUMERIC UNIT? '+'
Scale -> '/' NUMERIC? UNIT
Dose -> '(' NUMERIC UNIT ')'
EOG
def parse_size(size)
multi, addition, count, measure, scale, dose, comform = nil
begin
ast = @@parser.parse(size)
multi, addition, count, measure, scale, dose, comform = ast.flatten
count = (count ? count[1].value.to_i : 1)
rescue ParseException, AmbigousParseException => e
count = size.to_i
end
[
(addition ? addition.first.value.to_i : 0),
dose_from_multi(multi),
count,
dose_from_measure(measure),
dose_from_scale(scale),
(comform.value if comform),
]
end
Simple Rockit sample
require 'rockit/rockit'
parser = Parse.generate_parser <<-'END_OF_GRAMMAR'
Grammar ExampleGrammar
Tokens
Blank = /\s+/ [:Skip]
Number = /\d+/
Alphabet = /\w+/
Productions
Expr -> 'a' Number? Alphabet
END_OF_GRAMMAR
str1 = "a 123 abc"
str2 = "a abc"
p parser.parse(str1).flatten
p parser.parse(str2).flatten
regexp = /(a)\s*(\d+)?\s*(\w+)/
p regexp.match(str1).to_a[1, 10]
p regexp.match(str2).to_a[1, 10]
reg1 = '(a)'
reg2 = '(\d+)?'
reg3 = '(\w+)'
regexp = [reg1, reg2, reg3].join('\s*')
regexp = Regexp.new(regexp)
p regexp
p regexp.match(str1).to_a[1, 10]
p regexp.match(str2).to_a[1, 10]
reg1 = /(a)/
reg2 = /(\d+)/
reg3 = /(\w+)/
regexp = /#{reg1}\s*#{reg2}?\s*#{reg3}/
p regexp
p regexp.match(str1).to_a[1, 10]
p regexp.match(str2).to_a[1, 10]
Result
["a", "123", "abc"] ["a", nil, "abc"] ["a", "123", "abc"] ["a", nil, "abc"] /(a)\s*(\d+)?\s*(\w+)/ ["a", "123", "abc"] ["a", nil, "abc"] /(?-mix:(a))\s*(?-mix:(\d+)?)\s*(?-mix:(\w+))/ ["a", "123", "abc"] ["a", nil, "abc"]
Experiment
def _parse_size(size)
unit_pattern = /(([kmµucMG]?([glLJm]|mol|Bq)\b)(\/([mµu]?[glL])\b)?)|((Mio\s)?U\.?I\.?)|(%( [mV]\/[mV])?)|(I\.E\.)|(Fl\.)/
numeric_pattern = /\d+(\'\d+)*([.,]\d+)?/
iso_pattern = /[[:alpha:]()\-]+/
description = /(?!#{unit_pattern}\s)#{iso_pattern}(\s+#{iso_pattern})*/u
numeric = /#{numeric_pattern}/u
unit = /#{unit_pattern}/u
count = /(je)?\s*#{numeric}/
multiple = /(#{numeric})\s*(#{unit})?\s*([xXà]|Set)/
measure = /#{numeric}\s*#{unit}\s*#{unit}?/
addition = /#{numeric}\s*#{unit}?\s*\+/
scale = /\/s*#{numeric}?\s*#{unit}/
dose = /\(\s*#{numeric}\s*#{unit}\s*\)/
parser = /(?<multiple>#{multiple}*)\s*(?<addition>#{addition}?)\s*(?<count>#{count}?)\s*(?<measure>#{measure}?)\s*(?<scale>#{scale}?)\s*(?<dose>#{dose}?)\s*(?<description>#{description}?)/
size.match(parser)
end
Note
Experiment
Result
[_ArrayNode, nil, nil, Measure:["200","ml",nil], nil, nil, nil]
Result
["200 ml", "", "", "200", "", "", "", "ml"]
Note
["200 ml", "", "", "", "200 ml", "", "", ""]