Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 1 | //===- IndenticalCodeFolding.cpp ------------------------------------------===// |
| 2 | // |
| 3 | // The MCLinker Project |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 9 | #include "mcld/LD/IdenticalCodeFolding.h" |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 10 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 11 | #include "mcld/GeneralOptions.h" |
| 12 | #include "mcld/Module.h" |
| 13 | #include "mcld/Fragment/RegionFragment.h" |
| 14 | #include "mcld/LD/LDContext.h" |
| 15 | #include "mcld/LD/LDSection.h" |
| 16 | #include "mcld/LD/RelocData.h" |
| 17 | #include "mcld/LD/Relocator.h" |
| 18 | #include "mcld/LD/ResolveInfo.h" |
| 19 | #include "mcld/LD/SectionData.h" |
| 20 | #include "mcld/LinkerConfig.h" |
| 21 | #include "mcld/MC/Input.h" |
| 22 | #include "mcld/Support/Demangle.h" |
| 23 | #include "mcld/Support/MsgHandling.h" |
| 24 | #include "mcld/Target/GNULDBackend.h" |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 25 | |
| 26 | #include <llvm/ADT/StringRef.h> |
| 27 | #include <llvm/Support/Casting.h> |
| 28 | #include <llvm/Support/Format.h> |
| 29 | |
| 30 | #include <cassert> |
| 31 | #include <map> |
| 32 | #include <set> |
| 33 | |
| 34 | #include <zlib.h> |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 35 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 36 | namespace mcld { |
| 37 | |
| 38 | static bool isSymCtorOrDtor(const ResolveInfo& pSym) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 39 | // We can always fold ctors and dtors since accessing function pointer in C++ |
| 40 | // is forbidden. |
| 41 | llvm::StringRef name(pSym.name(), pSym.nameSize()); |
| 42 | if (!name.startswith("_ZZ") && !name.startswith("_ZN")) { |
| 43 | return false; |
| 44 | } |
| 45 | return isCtorOrDtor(pSym.name(), pSym.nameSize()); |
| 46 | } |
| 47 | |
| 48 | IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig, |
| 49 | const TargetLDBackend& pBackend, |
| 50 | Module& pModule) |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 51 | : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 52 | } |
| 53 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 54 | void IdenticalCodeFolding::foldIdenticalCode() { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 55 | // 1. Find folding candidates. |
| 56 | FoldingCandidates candidate_list; |
| 57 | findCandidates(candidate_list); |
| 58 | |
| 59 | // 2. Initialize constant section content |
| 60 | for (size_t i = 0; i < candidate_list.size(); ++i) { |
| 61 | candidate_list[i].initConstantContent(m_Backend, m_KeptSections); |
| 62 | } |
| 63 | |
| 64 | // 3. Find identical code until convergence |
| 65 | bool converged = false; |
| 66 | size_t iterations = 0; |
| 67 | while (!converged && (iterations < m_Config.options().getICFIterations())) { |
| 68 | converged = matchCandidates(candidate_list); |
| 69 | ++iterations; |
| 70 | } |
| 71 | if (m_Config.options().printICFSections()) { |
| 72 | debug(diag::debug_icf_iterations) << iterations; |
| 73 | } |
| 74 | |
| 75 | // 4. Fold the identical code |
| 76 | typedef std::set<Input*> FoldedObjects; |
| 77 | FoldedObjects folded_objs; |
| 78 | KeptSections::iterator kept, keptEnd = m_KeptSections.end(); |
| 79 | size_t index = 0; |
| 80 | for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) { |
| 81 | LDSection* sect = (*kept).first; |
| 82 | Input* obj = (*kept).second.first; |
| 83 | size_t kept_index = (*kept).second.second; |
| 84 | if (index != kept_index) { |
| 85 | sect->setKind(LDFileFormat::Folded); |
| 86 | folded_objs.insert(obj); |
| 87 | |
| 88 | if (m_Config.options().printICFSections()) { |
| 89 | KeptSections::iterator it = m_KeptSections.begin() + kept_index; |
| 90 | LDSection* kept_sect = (*it).first; |
| 91 | Input* kept_obj = (*it).second.first; |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 92 | debug(diag::debug_icf_folded_section) << sect->name() << obj->name() |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 93 | << kept_sect->name() |
| 94 | << kept_obj->name(); |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | // Adjust the fragment reference of the folded symbols. |
| 100 | FoldedObjects::iterator fobj, fobjEnd = folded_objs.end(); |
| 101 | for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) { |
| 102 | LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd(); |
| 103 | for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) { |
| 104 | if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) { |
| 105 | LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol(); |
| 106 | FragmentRef* frag_ref = out_sym->fragRef(); |
| 107 | LDSection* sect = &(frag_ref->frag()->getParent()->getSection()); |
| 108 | if (sect->kind() == LDFileFormat::Folded) { |
| 109 | size_t kept_index = m_KeptSections[sect].second; |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 110 | LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first; |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 111 | frag_ref->assign(kept_sect->getSectionData()->front(), |
| 112 | frag_ref->offset()); |
| 113 | } |
| 114 | } |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 115 | } // for each symbol |
| 116 | } // for each folded object |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 117 | } |
| 118 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 119 | void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 120 | Module::obj_iterator obj, objEnd = m_Module.obj_end(); |
| 121 | for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) { |
| 122 | std::set<const LDSection*> funcptr_access_set; |
| 123 | typedef std::map<LDSection*, LDSection*> CandidateMap; |
| 124 | CandidateMap candidate_map; |
| 125 | LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd(); |
| 126 | for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) { |
| 127 | switch ((*sect)->kind()) { |
| 128 | case LDFileFormat::TEXT: { |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 129 | candidate_map.insert( |
| 130 | std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL))); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 131 | break; |
| 132 | } |
| 133 | case LDFileFormat::Relocation: { |
| 134 | LDSection* target = (*sect)->getLink(); |
| 135 | if (target->kind() == LDFileFormat::TEXT) { |
| 136 | candidate_map[target] = *sect; |
| 137 | } |
| 138 | |
| 139 | // Safe icf |
| 140 | if (m_Config.options().getICFMode() == GeneralOptions::ICF_Safe) { |
| 141 | RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end(); |
| 142 | for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) { |
| 143 | LDSymbol* sym = rel->symInfo()->outSymbol(); |
| 144 | if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) { |
| 145 | const LDSection* def = |
| 146 | &sym->fragRef()->frag()->getParent()->getSection(); |
| 147 | if (!isSymCtorOrDtor(*rel->symInfo()) && |
| 148 | m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) && |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 149 | m_Backend.getRelocator() |
| 150 | ->mayHaveFunctionPointerAccess(*rel)) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 151 | funcptr_access_set.insert(def); |
| 152 | } |
| 153 | } |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 154 | } // for each reloc |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 155 | } |
| 156 | |
| 157 | break; |
| 158 | } |
| 159 | default: { |
| 160 | // skip |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 161 | break; |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 162 | } |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 163 | } // end of switch |
| 164 | } // for each section |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 165 | |
| 166 | CandidateMap::iterator candidate, candidateEnd = candidate_map.end(); |
| 167 | for (candidate = candidate_map.begin(); candidate != candidateEnd; |
| 168 | ++candidate) { |
| 169 | if ((m_Config.options().getICFMode() == GeneralOptions::ICF_All) || |
| 170 | (funcptr_access_set.count(candidate->first) == 0)) { |
| 171 | size_t index = m_KeptSections.size(); |
| 172 | m_KeptSections[candidate->first] = ObjectAndId(*obj, index); |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 173 | pCandidateList.push_back( |
| 174 | FoldingCandidate(candidate->first, candidate->second, *obj)); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 175 | } |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 176 | } // for each possible candidate |
| 177 | } // for each obj |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 178 | } |
| 179 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 180 | bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 181 | typedef std::multimap<uint32_t, size_t> ChecksumMap; |
| 182 | ChecksumMap checksum_map; |
| 183 | std::vector<std::string> contents(pCandidateList.size()); |
| 184 | bool converged = true; |
| 185 | |
| 186 | for (size_t index = 0; index < pCandidateList.size(); ++index) { |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 187 | contents[index] = pCandidateList[index].getContentWithVariables( |
| 188 | m_Backend, m_KeptSections); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 189 | uint32_t checksum = ::crc32(0xFFFFFFFF, |
| 190 | (const uint8_t*)contents[index].c_str(), |
| 191 | contents[index].length()); |
| 192 | |
| 193 | size_t count = checksum_map.count(checksum); |
| 194 | if (count == 0) { |
| 195 | checksum_map.insert(std::make_pair(checksum, index)); |
| 196 | } else { |
| 197 | std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret = |
| 198 | checksum_map.equal_range(checksum); |
| 199 | for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) { |
| 200 | size_t kept_index = (*it).second; |
| 201 | if (contents[index].compare(contents[kept_index]) == 0) { |
| 202 | m_KeptSections[pCandidateList[index].sect].second = kept_index; |
| 203 | converged = false; |
| 204 | break; |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | return converged; |
| 211 | } |
| 212 | |
| 213 | void IdenticalCodeFolding::FoldingCandidate::initConstantContent( |
| 214 | const TargetLDBackend& pBackend, |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 215 | const IdenticalCodeFolding::KeptSections& pKeptSections) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 216 | // Get the static content from text. |
| 217 | assert(sect != NULL && sect->hasSectionData()); |
| 218 | SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end(); |
| 219 | for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) { |
| 220 | switch (frag->getKind()) { |
| 221 | case Fragment::Region: { |
| 222 | const RegionFragment& region = llvm::cast<RegionFragment>(*frag); |
| 223 | content.append(region.getRegion().begin(), region.size()); |
| 224 | break; |
| 225 | } |
| 226 | default: { |
| 227 | // FIXME: Currently we only take care of RegionFragment. |
| 228 | break; |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // Get the static content from relocs. |
| 234 | if (reloc_sect != NULL && reloc_sect->hasRelocData()) { |
| 235 | RelocData::iterator rel, relEnd = reloc_sect->getRelocData()->end(); |
| 236 | for (rel = reloc_sect->getRelocData()->begin(); rel != relEnd; ++rel) { |
Stephen Hines | a6c24df | 2015-03-18 14:53:18 -0700 | [diff] [blame^] | 237 | llvm::format_object<Relocation::Type, |
| 238 | Relocation::Address, |
| 239 | Relocation::Address, |
| 240 | Relocation::Address> rel_info("%x%llx%llx%llx", |
| 241 | rel->type(), |
| 242 | rel->symValue(), |
| 243 | rel->addend(), |
| 244 | rel->place()); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 245 | char rel_str[48]; |
| 246 | rel_info.print(rel_str, sizeof(rel_str)); |
| 247 | content.append(rel_str); |
| 248 | |
| 249 | // Handle the recursive call. |
| 250 | LDSymbol* sym = rel->symInfo()->outSymbol(); |
| 251 | if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) { |
| 252 | LDSection* def = &sym->fragRef()->frag()->getParent()->getSection(); |
| 253 | if (def == sect) { |
| 254 | continue; |
| 255 | } |
| 256 | } |
| 257 | |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 258 | if (!pBackend.isSymbolPreemptible(*rel->symInfo()) && sym->hasFragRef() && |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 259 | (pKeptSections.find( |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 260 | &sym->fragRef()->frag()->getParent()->getSection()) != |
| 261 | pKeptSections.end())) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 262 | // Mark this reloc as a variable. |
| 263 | variable_relocs.push_back(rel); |
| 264 | } else { |
| 265 | // TODO: Support inlining merge sections if possible (target-dependent). |
| 266 | if ((sym->binding() == ResolveInfo::Local) || |
| 267 | (sym->binding() == ResolveInfo::Absolute)) { |
| 268 | // ABS or Local symbols. |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 269 | content.append(sym->name()).append(obj->name()).append( |
| 270 | obj->path().native()); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 271 | } else { |
| 272 | content.append(sym->name()); |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables( |
| 280 | const TargetLDBackend& pBackend, |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 281 | const IdenticalCodeFolding::KeptSections& pKeptSections) { |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 282 | std::string result(content); |
| 283 | // Compute the variable content from relocs. |
| 284 | std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end(); |
| 285 | for (rel = variable_relocs.begin(); rel != relEnd; ++rel) { |
| 286 | LDSymbol* sym = (*rel)->symInfo()->outSymbol(); |
| 287 | LDSection* def = &sym->fragRef()->frag()->getParent()->getSection(); |
| 288 | // Use the kept section index. |
| 289 | KeptSections::const_iterator it = pKeptSections.find(def); |
Stephen Hines | a6c24df | 2015-03-18 14:53:18 -0700 | [diff] [blame^] | 290 | llvm::format_object<size_t> kept_info("%x", (*it).second.second); |
Pete Chou | b332308 | 2014-08-06 21:29:36 +0800 | [diff] [blame] | 291 | char kept_str[8]; |
| 292 | kept_info.print(kept_str, sizeof(kept_str)); |
| 293 | result.append(kept_str); |
| 294 | } |
| 295 | |
| 296 | return result; |
| 297 | } |
Stephen Hines | 37b74a3 | 2014-11-26 18:48:20 -0800 | [diff] [blame] | 298 | |
| 299 | } // namespace mcld |