<< | Index | >>
Consideration
Review (lib/rpdf2txt/text_state.rb#update)
def update!(rotation=0) orientation = (rotation.to_f.round / 90) % 2 print "orientation= ", orientation, "\n" if orientation == 0 print "@tmxoffset = ", @tmxoffset, "\n" print "@tmx = ", @tmx, "\n" print "@dtmx = ", @dtmx, "\n" print "@tmxscale = ", @tmxscale, "\n" print "@cmxoffset = ", @cmxoffset, "\n" print "x = @tmxoffset + (@tmx + @dtmx) * @tmxscale = ", @tmxoffset + (@tmx + @dtmx) * @tmxscale, "\n" print "@x = x + @cmxoffset = ", @tmxoffset + (@tmx + @dtmx) * @tmxscale + @cmxoffset, "\n" elsif orientation == 1 print "@tmxoffset = ", @tmxoffset, "\n" print "@tmy = ", @tmy, "\n" print "@tmalpha = ", @tmalpha, "\n" print "x = @tmxoffset + @tmy * @tmalpha = ", @tmxoffset + @tmy * @tmalpha, "\n" print "@tmyoffset = ", @tmyoffset, "\n" print "@tmx = ", @tmx, "\n" print "@dtmx = ", @dtmx, "\n" print "@tmbeta = ", @tmbeta, "\n" print "y = @tmyoffset + (@tmx + @dtmx) * @tmbeta = ", @tmyoffset + (@tmx + @dtmx) * @tmbeta, "\n" print "@cmxoffset = ", @cmxoffset, "\n" print "@x = y + @cmxoffset = ", @tmyoffset + (@tmx + @dtmx) * @tmbeta + @cmxoffset, "\n" end
an old pdf
the latest pdf
Note
orientation = (rotation.to_f.round / 90) % 2
test.rb
0.step(360, 10) do |rotation| orientation = (rotation.to_f.round / 90) % 2 print rotation, "\t", orientation, "\n" end
Result
0 0 10 0 20 0 30 0 40 0 50 0 60 0 70 0 80 0 90 1 100 1 110 1 120 1 130 1 140 1 150 1 160 1 170 1 180 0 190 0 200 0 210 0 220 0 230 0 240 0 250 0 260 0 270 1 280 1 290 1 300 1 310 1 320 1 330 1 340 1 350 1
def _snip(snippet_text)
@text_state.set_txt(snippet_text)
@text_state.update!(@current_page ? @current_page.attributes[:rotate] : 0)
@snippets.push(@text_state.dup).last
end
Note
Next
Note
module Rpdf2txt class Text attr_writer :current_page ...
Note
grep
masa@masa ~/ywesee/rpdf2txt $ grep -r current_page lib/
lib/rpdf2txt/object.rb: hr.current_page, hr.text_state = @page, @text_state
lib/rpdf2txt/object.rb: ip.current_page, ip.text_state = @page, @text_state
lib/rpdf2txt/object.rb: ip.current_page, ip.text_state = @page, @text_state
lib/rpdf2txt/object.rb: text.current_page = page
lib/rpdf2txt/text.rb: attr_writer :current_page
lib/rpdf2txt/text.rb: return nil unless @current_page
lib/rpdf2txt/text.rb: @current_page.font(font_name.to_s.downcase.intern)
lib/rpdf2txt/text.rb: @text_state.update!(@current_page ? @current_page.attributes[:rotate] : 0)
lib/rpdf2txt/text_state.rb: attr_accessor :current_page
lib/rpdf2txt/text_state.rb: @xobject ||= @current_page.resources.xobject(@resource)
lib/rpdf2txt/object.rb#extract_text_object
def extract_text_objects(page, text_state) @page, @text_state = page, text_state ... text.current_page = page
def text(callback_handler) ... text_snippets = concat_stream.extract_text_objects(self, @text_state)
lib/rpdf2txt/parser.rb#build_object
def build_object(src) case src when /\/Type\s*\/Catalog\b/n CatalogNode.new(src, @target_encoding) when /\/Type\s*\/Pages\b/n PageNode.new(src, @target_encoding) when /\/Type\s*\/Page\b/n PageLeaf.new(src, @target_encoding) ...
Experiment
def build_object(src) case src when /\/Type\s*\/Catalog\b/n CatalogNode.new(src, @target_encoding) when /\/Type\s*\/Pages\b/n PageNode.new(src, @target_encoding) when /\/Type\s*\/Page\b/n print "PageLeaf.attributes[:rotate] = ", PageLeaf.new(src, @target_encoding).attributes[:rotate], "\n" PageLeaf.new(src, @target_encoding)
Result
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef_old.pdf PageLeaf.attributes[:rotate] = 90 PageLeaf.attributes[:rotate] = 90 PageLeaf.attributes[:rotate] = 90 PageLeaf.attributes[:rotate] = 90 PageLeaf.attributes[:rotate] = 90 PageLeaf.attributes[:rotate] = 90 ...
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef_latest.pdf PageLeaf.attributes[:rotate] = nil PageLeaf.attributes[:rotate] = nil PageLeaf.attributes[:rotate] = nil PageLeaf.attributes[:rotate] = nil PageLeaf.attributes[:rotate] = nil PageLeaf.attributes[:rotate] = nil ...
Reference
Calculation
Experiment (lib/rpdf2txt/text.rb#scan)
def scan p "getin scan" @snippets = [] ast = Rpdf2txt.text_parser.parse(@src) p ast puts exit scan_tree(ast) @snippets rescue Exception puts puts @src raise end
Result
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef_old.pdf "getin text" "getin extract_text_objects" "getin scan" Target:["BT", _ArrayNode:[ Tf:["/","TT2","1","Tf"], Tm:["0","14.0053","-13.9999","0","59.64","43.4305","Tm"], UElement:["0","g"], Tc:["-.0002","Tc"], TW:[".0008","Tw"], Tj:["(Zuzahlungsbefreite Arzneimittel nach \247 31 Abs. 3 Satz 4 SGB V)","Tj"], Tf:["/","TT4","1","Tf"], Tm:["0","9.0035","-9","0","117","176.6305","Tm"], Tc:[".0009","Tc"], TW:["0","Tw"], Tj:["(PZN)","Tj"], TD:["-14.7942","0","TD"], Tc:["-.0016","Tc"], Array:["[", _ArrayNode:[ TJSingleElement:["(Arzneimit)"], TJSingleElement:["-3.7"], TJSingleElement:["(t)"], TJSingleElement:["-3.7"], TJSingleElement:["(e)"], TJSingleElement:["1.4"], TJSingleElement:["(lname)"] ], "]TJ"], TD:["59.8165","0","TD"], Array:["[", _ArrayNode:[ TJSingleElement:["(Darreichungsf)"], TJSingleElement:["-3.7"], TJSingleElement:["(o)"], TJSingleElement:["1.4"], TJSingleElement:["(rm)"] ], "]TJ"], TD:["-39.9843","0","TD"], Tj:["(Hersteller)","Tj"], TD:["52.2661","0","TD"], Tc:[".0001","Tc"], Array:["[", _ArrayNode:[ TJSingleElement:["(Apo)"], TJSingleElement:["9.8"], TJSingleElement:["(t)"], TJSingleElement:["-8.6"], TJSingleElement:["(h)"], TJSingleElement:["9.8"], TJSingleElement:["(e)"], TJSingleElement:["-3.5"], TJSingleElement:["(ke)"], TJSingleElement:["9.8"], TJSingleElement:["(nverka)"], TJSingleElement:["9.8"], TJSingleElement:["(ufspre)"], TJSingleElement:["9.8"], TJSingleElement:["(is)"] ], "]TJ"], TD:["3.1321","-1.14","TD"], Tc:["-.0006","Tc"], TW:[".0027","Tw"], Array:["[", _ArrayNode:[ TJSingleElement:["( in)"], TJSingleElement:["-4.2"], TJSingleElement:["(kl)"], TJSingleElement:["8.3"], TJSingleElement:["(.)"], TJSingleElement:["-9.3"], TJSingleElement:["(M)"], TJSingleElement:["-.6"], TJSingleElement:["(w)"], TJSingleElement:["21.8"], TJSingleElement:["(S)"], TJSingleElement:["0"], TJSingleElement:["(t)"] ], "]TJ"] ], "ET"] BT /TT2 1 Tf 0 14.0053 -13.9999 0 59.64 43.4305 Tm 0 g -.0002 Tc .0008 Tw (Zuzahlungsbefreite Arzneimittel nach � 31 Abs. 3 Satz 4 SGB V)Tj /TT4 1 Tf 0 9.0035 -9 0 117 176.6305 Tm .0009 Tc 0 Tw (PZN)Tj -14.7942 0 TD -.0016 Tc [(Arzneimit)-3.7(t)-3.7(e)1.4(lname)]TJ 59.8165 0 TD [(Darreichungsf)-3.7(o)1.4(rm)]TJ -39.9843 0 TD (Hersteller)Tj 52.2661 0 TD .0001 Tc [(Apo)9.8(t)-8.6(h)9.8(e)-3.5(ke)9.8(nverka)9.8(ufspre)9.8(is)]TJ 3.1321 -1.14 TD -.0006 Tc .0027 Tw [( in)-4.2(kl)8.3(.)-9.3(M)-.6(w)21.8(S)0(t)]TJ ET
"getin text" "getin extract_text_objects" "getin scan" Target:["BT", _ArrayNode:[ Tf:["/","F1","13.92","Tf"], TD:["43.44","535.68","TD"], Tc:["-0.10512","Tc"], Tj:["(Zu)","Tj"], Tc:["0","Tc"], Tj:["(z)","Tj"], Tc:["-0.05952","Tc"], Tj:["(a)","Tj"], Tc:["-0.10512","Tc"], Tj:["(h)","Tj"], Tc:["-0.02976","Tc"], Tj:["(l)","Tj"], Tc:["-0.10512","Tc"], Tj:["(ung)","Tj"], Tc:["-0.05952","Tc"], Tj:["(s)","Tj"], Tc:["-0.10512","Tc"], Tj:["(b)","Tj"], Tc:["-0.05952","Tc"], Tj:["(ef)","Tj"], Tc:["0.10512","Tc"], Tj:["(r)","Tj"], Tc:["-0.05952","Tc"], Tj:["(e)","Tj"], Tc:["-0.02976","Tc"], Tj:["(i)","Tj"], Tc:["-0.07536","Tc"], Tj:["(te)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["-0.21024","Tc"], Tj:["(A)","Tj"], Tc:["0.10512","Tc"], Tj:["(r)","Tj"], Tc:["0","Tc"], Tj:["(z)","Tj"], Tc:["-0.10512","Tc"], Tj:["(n)","Tj"], Tc:["-0.05952","Tc"], Tj:["(e)","Tj"], Tc:["-0.02976","Tc"], Tj:["(i)","Tj"], Tc:["0.10512","Tc"], Tj:["(m)","Tj"], Tc:["-0.02976","Tc"], Tj:["(i)","Tj"], Tc:["-0.07536","Tc"], Tj:["(tte)","Tj"], Tc:["-0.02976","Tc"], Tj:["(l )","Tj"], Tc:["-0.10512","Tc"], Tj:["(n)","Tj"], Tc:["-0.05952","Tc"], Tj:["(ac)","Tj"], Tc:["-0.10512","Tc"], Tj:["(h)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["-0.05952","Tc"], Tj:["(\247)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["-0.05952","Tc"], Tj:["(31)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["-0.21024","Tc"], Tj:["(A)","Tj"], Tc:["-0.10512","Tc"], Tj:["(b)","Tj"], Tc:["-0.05952","Tc"], Tj:["(s)","Tj"], Tc:["-0.02976","Tc"], Tj:["(. )","Tj"], Tc:["-0.05952","Tc"], Tj:["(3)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["0.07536","Tc"], Tj:["(S)","Tj"], Tc:["-0.05952","Tc"], Tj:["(at)","Tj"], Tc:["0","Tc"], Tj:["(z)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["-0.05952","Tc"], Tj:["(4)","Tj"], Tc:["-0.02976","Tc"], Tj:["( )","Tj"], Tc:["0.07536","Tc"], Tj:["(S)","Tj"], Tc:["-0.02976","Tc"], Tj:["(G)","Tj"], Tc:["0.02976","Tc"], Tj:["(B)","Tj"], Tc:["-0.02976","Tc"], Tj:["( V)","Tj"], Tf:["/","F2","8.88","Tf"] ], "ET"] BT /F1 13.92 Tf 43.44 535.68 TD -0.10512 Tc (Zu) Tj 0 Tc (z) Tj -0.05952 Tc (a) Tj -0.10512 Tc (h) Tj -0.02976 Tc (l) Tj -0.10512 Tc (ung) Tj -0.05952 Tc (s) Tj -0.10512 Tc (b) Tj -0.05952 Tc (ef) Tj 0.10512 Tc (r) Tj -0.05952 Tc (e) Tj -0.02976 Tc (i) Tj -0.07536 Tc (te) Tj -0.02976 Tc ( ) Tj -0.21024 Tc (A) Tj 0.10512 Tc (r) Tj 0 Tc (z) Tj -0.10512 Tc (n) Tj -0.05952 Tc (e) Tj -0.02976 Tc (i) Tj 0.10512 Tc (m) Tj -0.02976 Tc (i) Tj -0.07536 Tc (tte) Tj -0.02976 Tc (l ) Tj -0.10512 Tc (n) Tj -0.05952 Tc (ac) Tj -0.10512 Tc (h) Tj -0.02976 Tc ( ) Tj -0.05952 Tc (\247) Tj -0.02976 Tc ( ) Tj -0.05952 Tc (31) Tj -0.02976 Tc ( ) Tj -0.21024 Tc (A) Tj -0.10512 Tc (b) Tj -0.05952 Tc (s) Tj -0.02976 Tc (. ) Tj -0.05952 Tc (3) Tj -0.02976 Tc ( ) Tj 0.07536 Tc (S) Tj -0.05952 Tc (at) Tj 0 Tc (z) Tj -0.02976 Tc ( ) Tj -0.05952 Tc (4) Tj -0.02976 Tc ( ) Tj 0.07536 Tc (S) Tj -0.02976 Tc (G) Tj 0.02976 Tc (B) Tj -0.02976 Tc ( V) Tj /F2 8.88 Tf ET
def scan p "getin scan" @snippets = [] ast = Rpdf2txt.text_parser.parse(@src) open("/home/masa/work/test.dat", "a") do |f| f.print @src, "\n" end scan_tree(ast) @snippets rescue Exception puts puts @src raise end
... BT 0 8.003 -7.9999 0 178.32 176.6305 Tm -.0014 Tc (3867225)Tj -16.6438 0 TD -.0016 Tc .0012 Tw (ACC 200)Tj 67.3249 0 TD -.0008 Tc 0 Tw [(Brausetabletten)-11552.6(15,42)]TJ -45.0132 0 TD -.0003 Tc -.0001 Tw (HEXAL AG)Tj 37.6885 0 TD -.0014 Tc 0 Tw [(100)-1379.5(S)-9.2(t)]TJ -22.4541 0 TD -.0023 Tc [(Ace)-8.4(t)5.6(y)10.4(l)-5(c)-12.1(y)10.4(st)-9.4(e)6.6(i)-5(n)-8645.2(200)-878.1(mg)]TJ ET BT 0 8.003 -7.9999 0 212.34 176.6305 Tm -.0014 Tc (4789763)Tj -16.6438 0 TD -.0016 Tc .0012 Tw (ACC 200)Tj 67.3249 0 TD -.0008 Tc 0 Tw [(Brausetabletten)-11552.6(11,01)]TJ -45.0132 0 TD -.0003 Tc -.0001 Tw (HEXAL AG)Tj 38.2433 0 TD -.0014 Tc 0 Tw [(20)-1379.5(S)-9.2(t)]TJ -23.0089 0 TD -.0023 Tc [(Ace)-8.4(t)5.6(y)10.4(l)-5(c)-12.1(y)10.4(st)-9.4(e)6.6(i)-5(n)-8645.2(200)-878.1(mg)]TJ ...
... BT /F2 7.92 Tf 1 0 0 1 0 0 Tm 176.64 450.48 TD -0.08352 Tc (3867219) Tj -133.2 0 TD 0 Tc (A) Tj 0.04176 Tc (CC) Tj -0.04176 Tc ( ) Tj -0.08352 Tc (200) Tj 538.8 0 TD 0 Tc (Br) Tj -0.32352 Tc (a) Tj -0.08352 Tc (u) Tj 0.12 Tc (s) Tj -0.32352 Tc (e) Tj -0.04176 Tc (t) Tj -0.32352 Tc (a) Tj -0.08352 Tc (bl) Tj -0.32352 Tc (e) Tj -0.04176 Tc (tt) Tj -0.32352 Tc (en) Tj 148.32 0 TD -0.08352 Tc (12) Tj -0.04176 Tc (,) Tj -0.08352 Tc (72) Tj -508.56 0 TD 0.04176 Tc (H) Tj 0 Tc (E) Tj -0.24264 Tc (X) Tj 0 Tc (A) Tj -0.08352 Tc (L) Tj -0.04176 Tc ( ) Tj 0 Tc (AG) Tj 294.48 0 TD -0.08352 Tc (50) Tj -0.04176 Tc ( ) Tj 31.44 0 TD 0.23736 Tc (S) Tj -0.04176 Tc (t) Tj -0.28176 Tc ( ) Tj -0.04176 Tc ( ) Tj 0.19824 Tc ( ) Tj -0.04176 Tc ( ) Tj -204 0 TD 0 Tc (A) Tj 0.12 Tc (c) Tj -0.32352 Tc (e) Tj -0.04176 Tc (t) Tj -0.36 Tc (y) Tj -0.07824 Tc (l) Tj 0.12 Tc (c) Tj -0.36 Tc (y) Tj 0.12 Tc (s) Tj -0.04176 Tc (t) Tj -0.32352 Tc (e) Tj -0.07824 Tc (in) Tj 115.68 0 TD (200) Tj 20.4 0 TD -0.11736 Tc (mg) Tj ET BT 176.64 416.64 TD -0.08352 Tc (3867225) Tj -133.2 0 TD 0 Tc (A) Tj 0.04176 Tc (CC) Tj -0.04176 Tc ( ) Tj -0.08352 Tc (200) Tj 538.8 0 TD 0 Tc (Br) Tj -0.32352 Tc (a) Tj -0.08352 Tc (u) Tj 0.12 Tc (s) Tj -0.32352 Tc (e) Tj -0.04176 Tc (t) Tj -0.32352 Tc (a) Tj -0.08352 Tc (bl) Tj -0.32352 Tc (e) Tj -0.04176 Tc (tt) Tj -0.32352 Tc (en) Tj 148.32 0 TD -0.08352 Tc (15) Tj -0.04176 Tc (,) Tj -0.08352 Tc (37) Tj -508.56 0 TD 0.04176 Tc (H) Tj 0 Tc (E) Tj -0.24264 Tc (X) Tj 0 Tc (A) Tj -0.08352 Tc (L) Tj -0.04176 Tc ( ) Tj 0 Tc (AG) Tj 292.32 0 TD -0.08352 Tc (100) Tj -0.04176 Tc ( ) Tj 33.6 0 TD 0.23736 Tc (S) Tj -0.04176 Tc (t) Tj -0.28176 Tc ( ) Tj -0.04176 Tc ( ) Tj 0.19824 Tc ( ) Tj -0.04176 Tc ( ) Tj -204 0 TD 0 Tc (A) Tj 0.12 Tc (c) Tj -0.32352 Tc (e) Tj -0.04176 Tc (t) Tj -0.36 Tc (y) Tj -0.07824 Tc (l) Tj 0.12 Tc (c) Tj -0.36 Tc (y) Tj 0.12 Tc (s) Tj -0.04176 Tc (t) Tj -0.32352 Tc (e) Tj -0.07824 Tc (in) Tj 115.68 0 TD (200) Tj 20.4 0 TD -0.11736 Tc (mg) Tj ET ...
Consideration
Hypothesis
Experiment ()
def text(callback_handler) p "getin text" concat_stream = Stream.new('') if(@contents.size == 1 && @contents.first.is_a?(ReferenceArray)) @contents.first.build_stream(concat_stream) else @contents.each { |stream| concat_stream.append(stream.decoded_stream) } end @text_state.media_box = self.media_box # here @x and @y of TextState instance are set text_snippets = concat_stream.extract_text_objects(self, @text_state)new_text_snippets = []
last = nil
snippet = nil
text_snippets.each do |snip|
snippet ||= snip.txt
if last
if last == snip
snippet << snip.txt
else
last.set_txt(snippet)
new_text_snippets << last.dup
snippet = snip.txt
last = snip
end
end
last = snip
end
# for last element
last.txt = snippet
new_text_snippets << last.dup
text_snippets = new_text_snippets
...
Result
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef_latest.pdf ... Seite 1 von 638 ACC 600 TABS 0434230 HEXAL AG Acetylcystein 600 mg 20 St Tabletten 12,16 ACC 600 TABS 0434224 HEXAL AG Acetylcystein 600 mg 10 St Tabletten 11,00 ACC 200 TABS 0451145 HEXAL AG Acetylcystein 200 mg 100 St Tabletten 15,37 ... ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,37 Zuzahlungsbefreite Arzneimittel nach § 31 Abs. 3 Satz 4 SGB V Produktstand 01 . 05 . 2011 sortiert nach Arzneimittelname Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkstärke(n) Packungs- Darreichungsform Apothekenverkaufspreis größe inkl.MwSt ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,72
Note
Consideration
Experiment (lib/rpdf2txt/object.rb#merge_snippets)
def merge_snippets(text_snippets) # comment new_text_snippets = [] last = nil snippet = nil text_snippets.each do |snip| snippet ||= snip.txt if last if last == snip snippet << snip.txt else last.txt = snippet new_text_snippets << last.dup snippet = snip.txt last = snip end end last = snip end # for last element lasttxt = snippet new_text_snippets << last.dup return new_text_snippets end
def text(callback_handler)
concat_stream = Stream.new('')
if(@contents.size == 1 && @contents.first.is_a?(ReferenceArray))
@contents.first.build_stream(concat_stream)
else
@contents.each { |stream|
concat_stream.append(stream.decoded_stream)
}
end
@text_state.media_box = self.media_box
# here @x and @y of TextState instance are set
text_snippets = concat_stream.extract_text_objects(self, @text_state)
text_snippets = merge_snippets(text_snippets)
Result
masa@masa ~/ywesee/rpdf2txt $ ruby -I lib bin/rpdf2txt zubef_latest.pdf ACC 600 TABS 0434230 HEXAL AG Acetylcystein 600 mg 20 St Tabletten 12,16 ACC 600 TABS 0434224 HEXAL AG Acetylcystein 600 mg 10 St Tabletten 11,00 ... ACC 200 3867225 HEXAL AG Acetylcystein 200 mg 100 St Brausetabletten 15,37 Zuzahlungsbefreite Arzneimittel nach § 31 Abs. 3 Satz 4 SGB V Produktstand 01 . 05 . 2011 sortiert nach Arzneimittelname Arzneimittelname PZN Hersteller Wirkstoff(e) Wirkstärke(n) Packungs- Darreichungsform Apothekenverkaufspreis größe inkl.MwSt ACC 200 3867219 HEXAL AG Acetylcystein 200 mg 50 St Brausetabletten 12,72
Note
Next
Experiment (lib/oddb/import/gkv.rb#process_page_rows)
def process_page rows rows.each do |row| p row import_row row end end
Run
ruby -I lib bin/oddb
ruby -I lib jobs/import_gkv
Result
./lib/oddb/html/view/drugs/package.rb:373: warning: parenthesize argument(s) for future version :doubtful ["600CCAABST", "0434230", "XAGLEHA", "teinAscytylce", "600", "mg", "20", "t S", "Tableentt", "1216,"] ODBA::Stub was unable to replace Hash#4265514 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265513 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265515 from ODDB::Drugs::Package:#34976 ["ACC600ABST", "0434224", "XHELAAG", "cetsyineyAtcl", "600", "mg", "10", "St", "Tablenett", "11,00"] ODBA::Stub was unable to replace Hash#4265517 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265516 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265518 from ODDB::Drugs::Package:#34973 ["AABSCC200T", "0451145", "AGEHALX", "tsyceintyAcel", "200", "mg", "100", "St", "Tablenett", "15,37"] ODBA::Stub was unable to replace Hash#4265520 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265519 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265521 from ODDB::Drugs::Package:#35434 ["CC200TABSA", "0451139", "AHEXLAG", "ycinetstyAcel", "200", "mg", "50", "tS", "Tblettaen", "7212,"] ODBA::Stub was unable to replace Hash#4265523 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265522 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265524 from ODDB::Drugs::Package:#35431 ["AABSTCC200", "0451122", "EHXALAG", "tysceinytAcel", "200", "mg", "20", "St", "Tblettena", "0011,"] ODBA::Stub was unable to replace Hash#4265526 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265525 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265527 from ODDB::Drugs::Package:#35428 ["200CCAPUVERL", "0253103", "AHXELAG", "ytlinetseAccy", "200", "mg", "100", "tS", "vPuler", "37,15"] ...
Run with the updated rpdf2txt
ruby -I ../rpdf2txt/lib:lib bin/oddb
ruby -I ../rpdf2txt/lib:lib jobs/import_gkv
Result
:doubtful ["600CCAABST", "0434230", "XAGLEHA", "teinAscytylce", "600", "mg", "20", "t S", "Tableentt", "1216,"] ODBA::Stub was unable to replace Hash#4265514 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265513 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265515 from ODDB::Drugs::Package:#34976 ["ACC600ABST", "0434224", "XHELAAG", "cetsyineyAtcl", "600", "mg", "10", "St", "Tablenett", "11,00"] ODBA::Stub was unable to replace Hash#4265517 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265516 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265518 from ODDB::Drugs::Package:#34973 ["AABSCC200T", "0451145", "AGEHALX", "tsyceintyAcel", "200", "mg", "100", "St", "Tablenett", "15,37"] ODBA::Stub was unable to replace Hash#4265520 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265519 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265521 from ODDB::Drugs::Package:#35434 ["CC200TABSA", "0451139", "AHEXLAG", "ycinetstyAcel", "200", "mg", "50", "tS", "Tblettaen", "7212,"] ODBA::Stub was unable to replace Hash#4265523 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265522 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265524 from ODDB::Drugs::Package:#35431 ["AABSTCC200", "0451122", "EHXALAG", "tysceinytAcel", "200", "mg", "20", "St", "Tblettena", "0011,"] ODBA::Stub was unable to replace Hash#4265526 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265525 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265527 from ODDB::Drugs::Package:#35428 ...
Note
Install the updated rpdf2txt
masa@masa ~/ywesee/rpdf2txt $ ruby install.rb config masa@masa ~/ywesee/rpdf2txt $ ruby install.rb setup masa@masa ~/ywesee/rpdf2txt $ sudo ruby install.rb install
Run again
ruby -I lib bin/oddb
ruby -I lib jobs/import_gkv
Result
:doubtful ["ACC 600 TABS", "0434230", "HEXAL AG", "Acetylcystein", "600", "mg", "20", "St", "Tabletten", "12,16"] ODBA::Stub was unable to replace Hash#4265514 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265513 from ODDB::Drugs::Package:#34976 ODBA::Stub was unable to replace Array#4265515 from ODDB::Drugs::Package:#34976 ["ACC 600 TABS", "0434224", "HEXAL AG", "Acetylcystein", "600", "mg", "10", "St", "Tabletten", "11,00"] ODBA::Stub was unable to replace Hash#4265517 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265516 from ODDB::Drugs::Package:#34973 ODBA::Stub was unable to replace Array#4265518 from ODDB::Drugs::Package:#34973 ["ACC 200 TABS", "0451145", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Tabletten", "15,37"] ODBA::Stub was unable to replace Hash#4265520 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265519 from ODDB::Drugs::Package:#35434 ODBA::Stub was unable to replace Array#4265521 from ODDB::Drugs::Package:#35434 ["ACC 200 TABS", "0451139", "HEXAL AG", "Acetylcystein", "200", "mg", "50", "St", "Tabletten", "12,72"] ODBA::Stub was unable to replace Hash#4265523 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265522 from ODDB::Drugs::Package:#35431 ODBA::Stub was unable to replace Array#4265524 from ODDB::Drugs::Package:#35431 ["ACC 200 TABS", "0451122", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Tabletten", "11,00"] ODBA::Stub was unable to replace Hash#4265526 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265525 from ODDB::Drugs::Package:#35428 ODBA::Stub was unable to replace Array#4265527 from ODDB::Drugs::Package:#35428 ["ACC 200 PULVER", "0253103", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Pulver", "15,37"] ODBA::Stub was unable to replace Hash#4265529 from ODDB::Drugs::Package:#29672 ODBA::Stub was unable to replace Array#4265528 from ODDB::Drugs::Package:#29672 ODBA::Stub was unable to replace Array#4265530 from ODDB::Drugs::Package:#29672 ["ACC 200 PULVER", "0253095", "HEXAL AG", "Acetylcystein", "200", "mg", "50", "St", "Pulver", "12,72"] ODBA::Stub was unable to replace Hash#4265532 from ODDB::Drugs::Package:#29669 ODBA::Stub was unable to replace Array#4265531 from ODDB::Drugs::Package:#29669 ODBA::Stub was unable to replace Array#4265533 from ODDB::Drugs::Package:#29669 ["ACC 200 PULVER", "0253089", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Pulver", "11,00"] ODBA::Stub was unable to replace Hash#4265535 from ODDB::Drugs::Package:#29666 ODBA::Stub was unable to replace Array#4265534 from ODDB::Drugs::Package:#29666 ODBA::Stub was unable to replace Array#4265536 from ODDB::Drugs::Package:#29666 ["ACC 200", "4789763", "HEXAL AG", "Acetylcystein", "200", "mg", "20", "St", "Brausetabletten", "11,00"] ODBA::Stub was unable to replace Hash#4265538 from ODDB::Drugs::Package:#164212 ODBA::Stub was unable to replace Array#4265537 from ODDB::Drugs::Package:#164212 ODBA::Stub was unable to replace Array#4265539 from ODDB::Drugs::Package:#164212 ["ACC 200", "3867225", "HEXAL AG", "Acetylcystein", "200", "mg", "100", "St", "Brausetabletten", "15,37"] ODBA::Stub was unable to replace Hash#4265541 from ODDB::Drugs::Package:#137633 ODBA::Stub was unable to replace Array#4265540 from ODDB::Drugs::Package:#137633 ODBA::Stub was unable to replace Array#4265542 from ODDB::Drugs::Package:#137633 :doubtful :doubtful :doubtful :doubtful :doubtful :doubtful ["ACEMUC 200", "3711606", "betapharm Arzneimittel GmbH", "Acetylcystein", "200", "mg", "100", "St", "Brausetabletten", "15,37"] ODBA::Stub was unable to replace Hash#4265544 from ODDB::Drugs::Package:#132406 ODBA::Stub was unable to replace Array#4265543 from ODDB::Drugs::Package:#132406 ODBA::Stub was unable to replace Array#4265545 from ODDB::Drugs::Package:#132406 ...
Note
Commit