diff --git a/Html2Docx.gemspec b/Html2Docx.gemspec index 27c57e8..9b87bbe 100644 --- a/Html2Docx.gemspec +++ b/Html2Docx.gemspec @@ -25,4 +25,5 @@ Gem::Specification.new do |spec| spec.add_development_dependency 'rspec', '~> 3.0' spec.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.8' spec.add_runtime_dependency 'rubyzip', '~> 1.2', '>= 1.2.0' + spec.add_runtime_dependency 'typhoeus', '~> 1.0', '>= 1.0.1' end diff --git a/README.md b/README.md index 76b1e6e..fbb17a8 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Html2Docx has very easy usage. * Supporting heading. [Wiki - Heading Usage](https://github.com/MuhammetDilmac/Html2Docx/wiki/Heading-Usage) * Supporting internal links. [Wiki - Internal Links Usage](https://github.com/MuhammetDilmac/Html2Docx/wiki/Internal-Links-Usage) * Supporting extarnal links. [Wiki - Extarnal Links Usage](https://github.com/MuhammetDilmac/Html2Docx/wiki/External-Links-Usage) +* Supporting image(internal & external). [Wiki - Image Usage](https://github.com/MuhammetDilmac/Html2Docx/wiki/Image-Usage) ## Contributing diff --git a/lib/Html2Docx.rb b/lib/Html2Docx.rb index f683130..f128227 100644 --- a/lib/Html2Docx.rb +++ b/lib/Html2Docx.rb @@ -1,6 +1,7 @@ require 'fileutils' require 'nokogiri' require 'zip' +require 'typhoeus' require 'Html2Docx/helpers/document_helper' require 'Html2Docx/helpers/nokogiri_helper' @@ -14,6 +15,7 @@ require 'Html2Docx/document_objects/paragraph' require 'Html2Docx/document_objects/heading' +require 'Html2Docx/document_objects/image' module Html2Docx ROOT_PATH = File.expand_path(File.join(File.dirname(__FILE__), '../')) diff --git a/lib/Html2Docx/document.rb b/lib/Html2Docx/document.rb index aae42f1..705ec7c 100644 --- a/lib/Html2Docx/document.rb +++ b/lib/Html2Docx/document.rb @@ -1,7 +1,8 @@ module Html2Docx class Document def initialize(options = {}) - @document_file = File.join(options.fetch(:temp), 'word', 'document2.xml') + @tmp_path = options[:temp] + @document_file = File.join(@tmp_path, 'word', 'document2.xml') @document = File.open(@document_file) { |f| Nokogiri::XML(f) } @body = @document.at_xpath('//w:body') @contents = [] @@ -26,11 +27,11 @@ def add_html(html) case element.name when 'p' # Add paragraph - paragraph = DocumentObjects::Paragraph.new(@document, @relation) + paragraph = DocumentObjects::Paragraph.new(@document, @relation, @tmp_path) paragraph.add_paragraph(element) @contents.push paragraph.render when /h[1-9]/ - heading = DocumentObjects::Heading.new(@document, @relation) + heading = DocumentObjects::Heading.new(@document, @relation, @tmp_path) heading.add_heading(element) @contents.push heading.render when 'table' diff --git a/lib/Html2Docx/document_objects/heading.rb b/lib/Html2Docx/document_objects/heading.rb index 7a4d1f9..4374bd3 100644 --- a/lib/Html2Docx/document_objects/heading.rb +++ b/lib/Html2Docx/document_objects/heading.rb @@ -1,9 +1,11 @@ module Html2Docx module DocumentObjects class Heading - def initialize(document, relation) + def initialize(document, relation, tmp_path) @document = document @relation = relation + @tmp_path = tmp_path + @heading = nil end @@ -11,7 +13,7 @@ def add_heading(heading_object) heading_object['class'] = "Heading#{heading_object.name.scan(/[0-9]/).first}" heading_object.name = 'p' - paragraph = Paragraph.new(@document, @relation) + paragraph = Paragraph.new(@document, @relation, @tmp_path) paragraph.add_paragraph(heading_object) @heading = paragraph.render diff --git a/lib/Html2Docx/document_objects/image.rb b/lib/Html2Docx/document_objects/image.rb new file mode 100644 index 0000000..655651d --- /dev/null +++ b/lib/Html2Docx/document_objects/image.rb @@ -0,0 +1,243 @@ +module Html2Docx + module DocumentObjects + class Image + def initialize(document, relation, tmp_path) + @document = document + @relation = relation + @tmp_path = tmp_path + + @media_path = nil + @image = nil + + check_and_create_media_directory + end + + def add_image(image_object) + image = get_image_information(image_object) + drawing_tag = create_drawing_tag + inline_tag = create_inline_tag + doc_pr_tag = create_doc_pr_tag(image) + graphic_tag = create_graphic_tag(image) + extent_tag = create_extent_tag(image) + c_nv_graphic_frame_pr = create_c_nv_graphic_frame_pr(image) + + inline_tag.add_child(extent_tag) + inline_tag.add_child(doc_pr_tag) + inline_tag.add_child(c_nv_graphic_frame_pr) + inline_tag.add_child(graphic_tag) + drawing_tag.add_child(inline_tag) + + drawing_tag + end + + private + + def get_image_information(image_object) + id = @relation.get_latest_image_id + 1 + path = image_object.attr('src') + name = path.split('/').last + title = image_object.attr('alt') || "Picture-#{id}" + height = image_object.attr('height').to_i + width = image_object.attr('width').to_i + + { name: name, title: title, path: path, height: height, width: width } + end + + def create_drawing_tag + Nokogiri::XML::Node.new('w:drawing', @document) + end + + def create_inline_tag + anchor_tag = Nokogiri::XML::Node.new('wp:inline', @document) + + anchor_tag + end + + def create_doc_pr_tag(image) + doc_pr_tag = Nokogiri::XML::Node.new('wp:docPr', @document) + doc_pr_tag['id'] = @relation.get_uniq_image_id + doc_pr_tag['name'] = image[:name] + doc_pr_tag['title'] = image[:title] + + doc_pr_tag + end + + def check_and_create_media_directory + @media_path = File.join(@tmp_path, 'media') + + Dir.mkdir @media_path unless Dir.exist? @media_path + end + + def create_graphic_tag(image) + graphic_tag = Nokogiri::XML::Node.new('a:graphic', @document) + + graphic_data_tag = create_graphic_data_tag(image) + graphic_tag.add_child(graphic_data_tag) + + graphic_tag + end + + def create_graphic_data_tag(image) + graphic_data_tag = Nokogiri::XML::Node.new('a:graphicData', @document) + graphic_data_tag['uri'] = 'http://schemas.openxmlformats.org/drawingml/2006/picture' + + pic_tag = create_pic_tag(image) + graphic_data_tag.add_child(pic_tag) + + graphic_data_tag + end + + def create_pic_tag(image) + pic_tag = Nokogiri::XML::Node.new('pic:pic', @document) + + nv_pic_pr_tag = create_nv_pic_pr_tag(image) + pic_tag.add_child(nv_pic_pr_tag) + + blip_fill_tag = create_blip_fill_tag(image) + pic_tag.add_child(blip_fill_tag) + + sp_pr_tag = create_sp_pr_tag(image) + pic_tag.add_child(sp_pr_tag) + + pic_tag + end + + def create_nv_pic_pr_tag(image) + nv_pic_pr_tag = Nokogiri::XML::Node.new('pic:nvPicPr', @document) + + c_nv_pr_tag = create_c_nv_pr_tag(image) + nv_pic_pr_tag.add_child(c_nv_pr_tag) + + c_nv_pic_pr = create_c_nv_pic_pr(image) + nv_pic_pr_tag.add_child(c_nv_pic_pr) + + nv_pic_pr_tag + end + + def create_c_nv_pr_tag(image) + c_nv_pr_tag = Nokogiri::XML::Node.new('pic:cNvPr', @document) + c_nv_pr_tag['id'] = @relation.get_uniq_image_id + c_nv_pr_tag['name'] = image[:name] + c_nv_pr_tag['title'] = image[:title] + + c_nv_pr_tag + end + + def create_c_nv_pic_pr(image) + c_nv_pic_pr_tag = Nokogiri::XML::Node.new('pic:cNvPicPr', @document) + + c_nv_pic_pr_tag + end + + def create_blip_fill_tag(image) + blip_fill_tag = Nokogiri::XML::Node.new('pic:blipFill', @document) + + blip_tag = create_blip_tag(image) + blip_fill_tag.add_child(blip_tag) + + stretch_tag = create_stretch_tag + blip_fill_tag.add_child(stretch_tag) + + blip_fill_tag + end + + def create_blip_tag(image) + blip_tag = Nokogiri::XML::Node.new('a:blip', @document) + blip_tag['r:embed'] = @relation.add_image(image, @media_path) + + blip_tag + end + + def create_stretch_tag + stretch_tag = Nokogiri::XML::Node.new('a:stretch', @document) + + fill_rect_tag = create_fill_rect_tag + stretch_tag.add_child(fill_rect_tag) + + stretch_tag + end + + def create_fill_rect_tag + Nokogiri::XML::Node.new('a:fillRect', @document) + end + + def create_sp_pr_tag(image) + sp_pr_tag = Nokogiri::XML::Node.new('pic:spPr', @document) + + xfrm_tag = create_xfrm_tag(image) + sp_pr_tag.add_child(xfrm_tag) + + prst_geom_tag = create_prst_geom_tag(image) + sp_pr_tag.add_child(prst_geom_tag) + + sp_pr_tag + end + + def create_xfrm_tag(image) + xfrm_tag = Nokogiri::XML::Node.new('a:xfrm', @document) + + off_tag = create_off_tag(image) + xfrm_tag.add_child(off_tag) + + ext_tag = create_ext_tag(image) + xfrm_tag.add_child(ext_tag) + + xfrm_tag + end + + def create_off_tag(image) + off_tag = Nokogiri::XML::Node.new('a:off', @document) + off_tag['x'] = '0' + off_tag['y'] = '0' + + off_tag + end + + def create_ext_tag(image) + ext_tag = Nokogiri::XML::Node.new('a:ext', @document) + ext_tag['cx'] = image[:width] * 9525 + ext_tag['cy'] = image[:height] * 9525 + + ext_tag + end + + def create_prst_geom_tag(image) + prst_geom_tag = Nokogiri::XML::Node.new('a:prstGeom', @document) + prst_geom_tag['prst'] = 'rect' + + av_lst_tag = create_av_lst_tag(image) + prst_geom_tag.add_child(av_lst_tag) + + prst_geom_tag + end + + def create_av_lst_tag(image) + Nokogiri::XML::Node.new('a:avLst', @document) + end + + def create_extent_tag(image) + ext_tag = Nokogiri::XML::Node.new('wp:extent', @document) + ext_tag['cx'] = image[:width] * 9525 + ext_tag['cy'] = image[:height] * 9525 + + ext_tag + end + + def create_c_nv_graphic_frame_pr(image) + c_nv_graphic_frame_pr_tag = Nokogiri::XML::Node.new('wp:cNvGraphicFramePr', @document) + + graphic_frame_locks_tag = create_graphic_frame_locks_tag(image) + c_nv_graphic_frame_pr_tag.add_child(graphic_frame_locks_tag) + + c_nv_graphic_frame_pr_tag + end + + def create_graphic_frame_locks_tag(image) + graphic_frame_locks_tag = Nokogiri::XML::Node.new('a:graphicFrameLocks', @document) + graphic_frame_locks_tag['noChangeAspect'] = 1 + + graphic_frame_locks_tag + end + end + end +end \ No newline at end of file diff --git a/lib/Html2Docx/document_objects/paragraph.rb b/lib/Html2Docx/document_objects/paragraph.rb index fbc8756..103d820 100644 --- a/lib/Html2Docx/document_objects/paragraph.rb +++ b/lib/Html2Docx/document_objects/paragraph.rb @@ -1,9 +1,10 @@ module Html2Docx module DocumentObjects class Paragraph - def initialize(document, relation) + def initialize(document, relation, tmp_path) @document = document @relation = relation + @tmp_path = tmp_path @paragraph = nil end @@ -135,6 +136,10 @@ def add_paragraph_child(children) hyperlink_tag.add_child text_field @paragraph.add_child hyperlink_tag next + when 'img' + text_field.add_child add_image(child) + @paragraph.add_child(text_field) + next end paragraph_id = child.attr('id') @@ -220,6 +225,11 @@ def add_stroke_text(text_style) text_style end + def add_image(image) + image_object_helper = DocumentObjects::Image.new(@document, @relation, @tmp_path) + image_object_helper.add_image(image) + end + def render @paragraph end diff --git a/lib/Html2Docx/relation.rb b/lib/Html2Docx/relation.rb index b2d737d..69a210b 100644 --- a/lib/Html2Docx/relation.rb +++ b/lib/Html2Docx/relation.rb @@ -7,6 +7,8 @@ def initialize(options = {}) @last_relation_id = 1 @internal_links = {} @external_links = {} + @images = {} + @unique_image_id = 0 if options[:main_relation] @relation_file = File.join(options.fetch(:temp), 'word', '_rels', 'document2.xml.rels') @@ -59,15 +61,24 @@ def create_internal_link_id(name) end end + def get_latest_internal_link_id + @internal_links.keys.max || 0 + end + + def find_internal_link_id(name) + @internal_links.find{ |key, value| value == name } + end + def create_external_link_id(destination) id, value = find_external_link_id(destination) if id id else - id = get_latest_external_link_id.delete('rId').to_i + 1 - @external_links["rId#{id}"] = destination - "rId#{id}" + id = get_latest_external_link_id.delete('elId').to_i + 1 + @external_links["elId#{id}"] = destination + + "elId#{id}" end end @@ -76,28 +87,85 @@ def find_external_link_id(destination) end def get_latest_external_link_id - @external_links.keys.max || "rId0" + @external_links.keys.max || 'elId0' end - def get_latest_internal_link_id - @internal_links.keys.max || 0 + def get_uniq_image_id + @unique_image_id = @unique_image_id + 1 end - def find_internal_link_id(name) - @internal_links.find{ |key, value| value == name } + def add_image(image, media_path) + real_path = image[:path] + image_name = image[:path].split('/').last + current_path = File.join(media_path, image_name) + + if real_path.start_with? 'http' + request = Typhoeus::Request.new(real_path) + image_file = File.open(current_path, 'wb+') + + request.on_headers do |response| + if response.code != 200 + raise "Image not found! Image Path: #{real_path}" + end + end + + request.on_body do |data| + image_file.write(data) + end + + request.on_complete do |response| + image_file.close + end + + request.run + else + if File.exist? real_path + FileUtils.cp real_path, current_path + else + raise "Image not found! Image Path: #{real_path}" + end + end + + relation_image_path = File.join('/', 'media', image_name) + + add_image_relation(relation_image_path) + end + + def add_image_relation(relation_image_path) + image_id = "iId#{get_uniq_image_id}" + + @images[image_id] = { + type: 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', + target: relation_image_path + } + + image_id + end + + def get_latest_image_id + @images.keys.max.to_i end def render @external_links.each do |key, value| external_link_relation = Nokogiri::XML::Node.new('Relationship', @relation) - external_link_relation['Id'] = key - external_link_relation['Type'] = 'http://. . ./hyperlink' + external_link_relation['Type'] = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' external_link_relation['Target'] = value external_link_relation['TargetMode'] = 'External' + external_link_relation['Id'] = key @relation.root.add_child(external_link_relation) end + @images.each do |key, value| + image_relation = Nokogiri::XML::Node.new('Relationship', @relation) + image_relation['Type'] = value[:type] + image_relation['Target'] = value[:target] + image_relation['Id'] = key + + @relation.root.add_child(image_relation) + end + File.open(@relation_file, 'w') { |f| f.write(Helpers::NokogiriHelper.to_xml(@relation)) } end end diff --git a/lib/Html2Docx/version.rb b/lib/Html2Docx/version.rb index dbd7992..314a909 100644 --- a/lib/Html2Docx/version.rb +++ b/lib/Html2Docx/version.rb @@ -1,3 +1,3 @@ module Html2Docx - VERSION = '0.4.1' + VERSION = '0.5.0' end \ No newline at end of file diff --git a/samples/Picture.docx b/samples/Picture.docx new file mode 100644 index 0000000..651871c Binary files /dev/null and b/samples/Picture.docx differ diff --git a/skell/[Content_Types].xml b/skell/[Content_Types].xml index 4d5931e..54b8452 100644 --- a/skell/[Content_Types].xml +++ b/skell/[Content_Types].xml @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/skell/word/document2.xml b/skell/word/document2.xml index ad5106b..50bd863 100644 --- a/skell/word/document2.xml +++ b/skell/word/document2.xml @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file