<< | Index | >>
p2id_list = {} a = [] File.readlines('SourceMuG_2011_02_28_Aktuelle_IDs.csv').each do |line| x = line.split(/,/) p2id_list[x[5].to_i] = x[0].to_i if x[5].to_i.nonzero? a << x[5].to_i if x[5].to_i.nonzero? end # for check =begin p a.length p a.uniq.length exit =end File.readlines('migel_swissINDEX.CSV').each_with_index do |line, i| pharmacode = line.split(/;/)[1].to_i if i == 0 print "Eindeuige-ID;", line else if uniqid = p2id_list[pharmacode] print uniqid end print ";", line end end
Run
ruby put_uniq_id.rb > new.csv
Problem
Run
Note
1. lib/oddb/import/gkv.rb#import_row
def import_row(row) if row == :doubtful @doubtful_pzns.push @created_pzn if @created_pzn @created_pzn = nil return end @count += 1 package = import_package(row) return if package.nil? @confirmed_pzns.store(package.code(:cid).value, true) if(code = package.code(:zuzahlungsbefreit)) if(code.value) @existing += 1 else @created += 1 end code.value = true else @created += 1 code = Util::Code.new(:zuzahlungsbefreit, true, 'DE') package.add_code(code) end
Note
2. lib/oddb/import/gkv.rb#postprocess
def postprocess Drugs::Package.search_by_code(:type => 'zuzahlungsbefreit', :value => 'true', :country => 'DE').each { |package| pzn = package.code(:cid).value unless(@confirmed_pzns.include?(pzn)) @deleted += 1 package.code(:zuzahlungsbefreit).value = false save package end } unless(@confirmed_pzns.empty?)
Note
Experiment (check if it is really @confirmed_pzns is updated)
def process_page rows rows.each_with_index do |row, i| print i, "/", rows.length," ", row.to_s, "\n"
Result
... 6/16 ACC 200 PULVER0253103HEXAL AGAcetylcystein200mg100StPulver15,37 7/16 ACC 200 PULVER0253095HEXAL AGAcetylcystein200mg50StPulver12,72 8/16 ACC 200 PULVER0253089HEXAL AGAcetylcystein200mg20StPulver11,00 9/16 ACC 2004789763HEXAL AGAcetylcystein200mg20StBrausetabletten11,00 10/16 ACC 2003867225HEXAL AGAcetylcystein200mg100StBrausetabletten15,37 11/16 doubtful 12/16 doubtful 13/16 doubtful 14/16 doubtful 15/16 doubtful 0/16 doubtful ...
Note
Experiment (lib/oddb/import/gkv.rb#send_line_break)
def send_line_break data = @current_line.strip.split /\s{3,}/ p data puts "="*20
Result
masa@masa ~/ywesee/de.oddb.org $ ruby -I lib jobs/import_gkv ... ["639"] ==================== ["ACC 600 TABS", "0434230", "HEXAL AG", "Acetylcystein", "600", "mg", "20", "St", "Tabletten", "12,16"] ==================== ["ACC 600 TABS", "0434224", "HEXAL AG", "Acetylcystein", "600", "mg", "10", "St", "Tabletten", "11,00"] ==================== ["ACC 200 TABS", "0451145", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Tabletten", "15,37"] ==================== ["ACC 200 TABS", "0451139", "HEXAL AG", "Acetylcystein", "200", "mg", "50", "St", "Tabletten", "12,72"] ==================== ["ACC 200 TABS", "0451122", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Tabletten", "11,00"] ==================== ["ACC 200 PULVER", "0253103", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Pulver", "15,37"] ==================== ["ACC 200 PULVER", "0253095", "HEXAL AG", "Acetylcystein", "200", "mg", "50", "St", "Pulver", "12,72"] ==================== ["ACC 200 PULVER", "0253089", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Pulver", "11,00"] ==================== ["ACC 200", "4789763", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Brausetabletten", "11,00"] ==================== ["ACC 200", "3867225", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Brausetabletten", "15,37"] ==================== ["Zuzahlungsbefreite Arzneimittel nach § 31 Abs. 3 Satz 4 SGB V"] ==================== ["Produktstand", "15 . 05 . 2011"] ==================== ["sortiert nach Arzneimittelname"] ==================== ["Arzneimittelname", "PZN", "Hersteller", "Wirkstoff(e)", "Wirkstärke(n)", "Packungs-", "Darreichungsform", "Apothekenverkaufspreis"] ==================== ["größe", "inkl.MwSt"] ==================== 0/16 doubtful 1/16 ACC 600 TABS0434230HEXAL AGAcetylcystein600mg20StTabletten12,16 2/16 ACC 600 TABS0434224HEXAL AGAcetylcystein600mg10StTabletten11,00 3/16 ACC 200 TABS0451145HEXAL AGAcetylcystein200mg100StTabletten15,37 4/16 ACC 200 TABS0451139HEXAL AGAcetylcystein200mg50StTabletten12,72 5/16 ACC 200 TABS0451122HEXAL AGAcetylcystein200mg20StTabletten11,00 6/16 ACC 200 PULVER0253103HEXAL AGAcetylcystein200mg100StPulver15,37 7/16 ACC 200 PULVER0253095HEXAL AGAcetylcystein200mg50StPulver12,72 8/16 ACC 200 PULVER0253089HEXAL AGAcetylcystein200mg20StPulver11,00 9/16 ACC 2004789763HEXAL AGAcetylcystein200mg20StBrausetabletten11,00 10/16 ACC 2003867225HEXAL AGAcetylcystein200mg100StBrausetabletten15,37 11/16 doubtful 12/16 doubtful 13/16 doubtful 14/16 doubtful 15/16 doubtful ...
Note
Experiment
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef.pdf
Result
... ACC 200 4789763 HEXAL AG Acetylcystein 200 mg 20 St Brausetabletten 11,00 ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,37 Zuzahlungsbefreite Arzneimittel nach § 31 Abs. 3 Satz 4 SGB V Produktstand 15 . 05 . 2011 sortiert nach Arzneimittelname Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkstärke(n) Packungs- Darreichungsform Apothekenverkaufspreis größe inkl.MwSt ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,72 639 ACEMUC 200 3711606 betapharm Arzneimittel GmbH Acetylcystein 200 mg 100 St Brausetabletten 15,37 ACEMUC 200 3711598 betapharm Arzneimittel GmbH Acetylcystein 200 mg 50 St Brausetabletten ...
Note
Experiment lib/oddb/import/gkv.rb#send_following_data
def send_flowing_data(data) p data puts "-"*20 @current_line << data end def send_line_break p @current_line puts "*"*20 data = @current_line.strip.split /\s{3,}/ p data puts "="*20 if /^\d{6,}$/.match(data[1].to_s) ## ensure consistent row-length, so we can append additional substances # to the tail data[9] ||= nil @rows.push data else @rows.push :doubtful end reset end
Run
Result
... "ACC 200" -------------------- " " -------------------- "3867225" -------------------- " " -------------------- "HEXAL AG" -------------------- " " -------------------- "Acetylcystein" -------------------- " " -------------------- "200" -------------------- " " -------------------- "mg" -------------------- " " -------------------- "100 " -------------------- " " -------------------- "St " -------------------- " " -------------------- "Brausetabletten" -------------------- " " -------------------- "15,37" -------------------- "ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,37" ******************** ["ACC 200", "3867225", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Brausetabletten", "15,37"] ... "ACC 200" -------------------- " " -------------------- "3867219" -------------------- " " -------------------- "HEXAL AG" -------------------- " " -------------------- "Acetylcystein" -------------------- " " -------------------- "200" -------------------- " " -------------------- "mg" -------------------- " " -------------------- "50 " -------------------- " " -------------------- "St " -------------------- " " -------------------- "Brausetabletten" -------------------- " " -------------------- "12,72" -------------------- 0/16 doubtful 1/16 ACC 600 TABS0434230HEXAL AGAcetylcystein600mg20StTabletten12,16 2/16 ACC 600 TABS0434224HEXAL AGAcetylcystein600mg10StTabletten11,00 3/16 ACC 200 TABS0451145HEXAL AGAcetylcystein200mg100StTabletten15,37 4/16 ACC 200 TABS0451139HEXAL AGAcetylcystein200mg50StTabletten12,72 5/16 ACC 200 TABS0451122HEXAL AGAcetylcystein200mg20StTabletten11,00 6/16 ACC 200 PULVER0253103HEXAL AGAcetylcystein200mg100StPulver15,37 7/16 ACC 200 PULVER0253095HEXAL AGAcetylcystein200mg50StPulver12,72 8/16 ACC 200 PULVER0253089HEXAL AGAcetylcystein200mg20StPulver11,00 9/16 ACC 2004789763HEXAL AGAcetylcystein200mg20StBrausetabletten11,00 10/16 ACC 2003867225HEXAL AGAcetylcystein200mg100StBrausetabletten15,37 11/16 doubtful 12/16 doubtful 13/16 doubtful 14/16 doubtful 15/16 doubtful
Note
Run
Experiment (lib/oddb/import/gkv.rb#reset)
def reset p 'reset' @current_line = '' end def send_flowing_data(data) p 'send_flowing_data' @current_line << data end def send_line_break p 'send_line_break' ... def send_page p 'send_page' ... def process_page rows p 'process_page' rows.each_with_index do |row, i| print i, "/", rows.length," ", row.to_s, "\n" ...
Result
"reset" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_line_break" "reset" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_line_break" "reset" ... "reset" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_page" "process_page" ... "reset" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_line_break" "reset" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_page" "process_page" ...
Note
Experiment (import_gkv with the old pdf, lib/oddb/import/gkv.rb#download_latest)
def download_latest(url, opts={}, &block) url = '/home/masa/work/zubef2010.12.02.pdf'
Run
Result
"send_flowing_data" "send_flowing_data" "send_flowing_data" "send_flowing_data" "send_line_break" "reset" "send_flowing_data" "send_page" "process_page" 0/16 doubtful 1/16 doubtful 2/16 doubtful 3/16 doubtful 4/16 doubtful 5/16 ACC 2003867219HEXAL AGAcetylcystein200mg50StBrausetabletten12,74 6/16 ACC 2003867225HEXAL AGAcetylcystein200mg100StBrausetabletten15,42 7/16 ACC 2004789763HEXAL AGAcetylcystein200mg20StBrausetabletten11,01 8/16 ACC 200 PULVER0253089HEXAL AGAcetylcystein200mg20StPulver11,01 9/16 ACC 200 PULVER0253095HEXAL AGAcetylcystein200mg50StPulver12,74 10/16 ACC 200 PULVER0253103HEXAL AGAcetylcystein200mg100StPulver15,42 11/16 ACC 200 TABS0451122HEXAL AGAcetylcystein200mg20StTabletten11,01 12/16 ACC 200 TABS0451139HEXAL AGAcetylcystein200mg50StTabletten12,74 13/16 ACC 200 TABS0451145HEXAL AGAcetylcystein200mg100StTabletten15,42 14/16 ACC 600 TABS0434224HEXAL AGAcetylcystein600mg10StTabletten11,01 15/16 ACC 600 TABS0434230HEXAL AGAcetylcystein600mg20StTabletten12,19 ...
Note
Consideration
Question
Experiment (lib/oddb/import/gkv.rb#send_page)
def send_page send_line_break
Run
Result
"reset" "send_flowing_data" "ACC 200" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "3867219" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "HEXAL AG" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "Acetylcystein" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "200" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "mg" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "50 " -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "St " -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "Brausetabletten" -------------------- "send_flowing_data" " " -------------------- "send_flowing_data" "12,72" -------------------- "send_line_break" "reset" "send_page" "process_page" 0/17 doubtful 1/17 ACC 600 TABS0434230HEXAL AGAcetylcystein600mg20StTabletten12,16 2/17 ACC 600 TABS0434224HEXAL AGAcetylcystein600mg10StTabletten11,00 3/17 ACC 200 TABS0451145HEXAL AGAcetylcystein200mg100StTabletten15,37 4/17 ACC 200 TABS0451139HEXAL AGAcetylcystein200mg50StTabletten12,72 5/17 ACC 200 TABS0451122HEXAL AGAcetylcystein200mg20StTabletten11,00 6/17 ACC 200 PULVER0253103HEXAL AGAcetylcystein200mg100StPulver15,37 7/17 ACC 200 PULVER0253095HEXAL AGAcetylcystein200mg50StPulver12,72 8/17 ACC 200 PULVER0253089HEXAL AGAcetylcystein200mg20StPulver11,00 9/17 ACC 2004789763HEXAL AGAcetylcystein200mg20StBrausetabletten11,00 10/17 ACC 2003867225HEXAL AGAcetylcystein200mg100StBrausetabletten15,37 11/17 doubtful 12/17 doubtful 13/17 doubtful 14/17 doubtful 15/17 doubtful 16/17 ACC 2003867219HEXAL AGAcetylcystein200mg50StBrausetabletten12,72 "reset"
Note
Experiment (check the zuzahlungsbefreit flag of the package with pzn == 3867219)
Run
Expectation