#include #include #include #include #include #include #include "pugixml.hpp" namespace fs = std::filesystem; void processBundle(const pugi::xml_node &bundle, const fs::path &htmlDir, bool dryRun, const std::string &holderId, int &processedBundles, int &skippedBundles) { std::cout << "Entering processBundle\n"; // Get holder identifier (optional) std::string holder = "none"; pugi::xml_node holderNode = bundle.child("Holder"); if (holderNode) { pugi::xml_node holderIdentifierNode = holderNode.child("Identifier"); if (holderIdentifierNode) { holder = holderIdentifierNode.text().as_string(); if (holder.empty()) { std::cout << "Warning: Empty Holder/Identifier in bundle\n"; holder = "empty"; } } else { std::cout << "Warning: No Identifier found in Holder\n"; } } else { std::cout << "Warning: No Holder node found in bundle\n"; } // Filter by holder ID if provided if (!holderId.empty() && holder != holderId && holder != "none" && holder != "empty") { std::cout << "Skipping bundle (Holder/Identifier: " << holder << " does not match requested " << holderId << ")\n"; ++skippedBundles; return; } // Set output directory based on holder ID fs::path outputDir = htmlDir / (holder == "none" || holder == "empty" ? "unknown_holder" : holder); if (!dryRun && !fs::exists(outputDir)) { try { fs::create_directories(outputDir); std::cout << "Created output directory: " << outputDir.string() << "\n"; } catch (const std::filesystem::filesystem_error &e) { std::cout << "Error: Failed to create output directory " << outputDir.string() << ": " << e.what() << "\n"; return; } } // Get identifier from RegulatedAuthorization pugi::xml_node regAuth = bundle.child("RegulatedAuthorization"); std::string identifier = "unknown"; if (regAuth) { pugi::xml_node identifierNode = regAuth.child("Identifier"); if (identifierNode) { identifier = identifierNode.text().as_string(); if (identifier.empty()) { std::cout << "Warning: Empty Identifier in RegulatedAuthorization\n"; identifier = "unknown"; } } else { std::cout << "Warning: No Identifier found in RegulatedAuthorization\n"; } } else { std::cout << "Warning: No RegulatedAuthorization found in bundle\n"; } // Get type pugi::xml_node typeNode = bundle.child("Type"); std::string type = typeNode ? typeNode.text().as_string() : "unknown"; std::cout << "Processing bundle: Identifier=" << identifier << ", Holder=" << holder << ", Type=" << type << ", Output Dir=" << outputDir.string() << "\n"; ++processedBundles; // Process AttachedDocument nodes bool foundHtml = false; for (auto attached : bundle.children("AttachedDocument")) { pugi::xml_node langNode = attached.child("Language"); std::string lang = langNode ? langNode.text().as_string() : "unknown"; std::cout << " Processing AttachedDocument: Language=" << lang << "\n"; for (auto docRef : attached.children("DocumentReference")) { pugi::xml_node contentTypeNode = docRef.child("ContentType"); pugi::xml_node urlNode = docRef.child("Url"); if (!contentTypeNode || !urlNode) { std::cout << " Warning: Missing ContentType or Url in DocumentReference (Identifier: " << identifier << ")\n"; continue; } std::string contentType = contentTypeNode.text().as_string(); std::string url = urlNode.text().as_string(); std::cout << " Found DocumentReference: ContentType=" << contentType << ", Url=" << url << "\n"; if (contentType != "text/html") { std::cout << " Skipping non-HTML file: " << url << " (ContentType: " << contentType << ")\n"; continue; } foundHtml = true; // Extract basename from URL std::string basename; std::string::size_type pos = url.find_last_of("/"); if (pos != std::string::npos && pos < url.length() - 1) { basename = url.substr(pos + 1); } else { std::cout << " Warning: Invalid URL format: " << url << " (Identifier: " << identifier << ")\n"; continue; } if (basename.empty()) { std::cout << " Warning: Empty basename from URL: " << url << " (Identifier: " << identifier << ")\n"; continue; } // Construct file paths fs::path oldFile = htmlDir / basename; std::string newName = identifier + "_" + type + "_" + lang + ".html"; fs::path newFile = outputDir / newName; std::cout << " PLAN: Copy " << oldFile.string() << " -> " << newFile.string() << "\n"; // Check if source file exists if (!fs::exists(oldFile)) { std::cout << " Warning: Source file not found: " << oldFile.string() << "\n"; continue; } if (!dryRun) { try { if (fs::exists(newFile)) { std::cout << " Warning: Target file already exists: " << newFile.string() << "\n"; continue; } fs::copy(oldFile, newFile, fs::copy_options::overwrite_existing); std::cout << " SUCCESS: Copied " << oldFile.string() << " -> " << newFile.string() << "\n"; } catch (const std::filesystem::filesystem_error &e) { std::cout << " Error: Filesystem error copying " << oldFile.string() << ": " << e.what() << "\n"; } catch (const std::exception &e) { std::cout << " Error: Exception copying " << oldFile.string() << ": " << e.what() << "\n"; } } } } if (!foundHtml) { std::cout << "Warning: No HTML files found in bundle (Identifier: " << identifier << ", Holder: " << holder << ")\n"; } } int main(int argc, char* argv[]) { std::string xmlFile, htmlDir, holderId; bool dryRun = false; // Parse command-line arguments for (int i = 1; i < argc; ++i) { std::string arg = argv[i]; if (arg == "--dry-run") { dryRun = true; } else if (arg == "--holder-id" && i + 1 < argc) { holderId = argv[++i]; } else if (xmlFile.empty()) { xmlFile = arg; } else if (htmlDir.empty()) { htmlDir = arg; } } if (xmlFile.empty() || htmlDir.empty()) { std::cerr << "Usage: " << argv[0] << " [--dry-run] [--holder-id ]" << std::endl; return 1; } std::cout << "Program started: XML=" << xmlFile << ", HTML Dir=" << htmlDir << ", Holder ID=" << (holderId.empty() ? "none" : holderId) << ", Dry Run=" << (dryRun ? "true" : "false") << "\n"; // Validate XML file exists if (!fs::exists(xmlFile)) { std::cerr << "Error: XML file does not exist: " << xmlFile << "\n"; return 1; } // Validate HTML directory exists if (!fs::exists(htmlDir) || !fs::is_directory(htmlDir)) { std::cerr << "Error: Input directory does not exist or is not a directory: " << htmlDir << "\n"; return 1; } // Load XML pugi::xml_document doc; pugi::xml_parse_result result = doc.load_file(xmlFile.c_str()); if (!result) { std::cerr << "Error loading XML: " << result.description() << "\n"; return 1; } auto firstChild = doc.first_child(); if (!firstChild) { std::cerr << "Error: XML document has no root element\n"; return 1; } std::cout << "Loaded XML root: " << firstChild.name() << "\n"; // Find all MedicinalDocumentsBundle elements int processedBundles = 0, skippedBundles = 0; try { pugi::xpath_node_set bundles = doc.select_nodes("//MedicinalDocumentsBundle"); std::cout << "Found " << bundles.size() << " MedicinalDocumentsBundle elements\n"; if (bundles.empty()) { std::cout << "No MedicinalDocumentsBundle elements found in XML\n"; return 0; } for (auto node : bundles) { processBundle(node.node(), htmlDir, dryRun, holderId, processedBundles, skippedBundles); } } catch (const pugi::xpath_exception &e) { std::cerr << "XPath error: " << e.what() << "\n"; return 1; } catch (const std::exception &e) { std::cerr << "Unexpected error: " << e.what() << "\n"; return 1; } std::cout << "Program completed: Processed " << processedBundles << " bundles, Skipped " << skippedBundles << " bundles\n"; if (!holderId.empty() && processedBundles == 0) { std::cerr << "Warning: No bundles matched Holder/Identifier=" << holderId << "\n"; } return 0; }