// Copyright 2025 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may
// obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/profiling/internal/profile_builder.h"

#ifdef __linux__
#include <elf.h>
#include <link.h>
#endif  // __linux__

#include <cassert>
#include <cstdint>
#include <cstring>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/types/span.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {

namespace {

// This file contains a simplified implementation of the pprof profile builder,
// which avoids a dependency on protobuf.
//
// The canonical profile proto definition is at
// https://github.com/google/pprof/blob/master/proto/profile.proto
//
// Wire-format encoding is a simple sequence of (tag, value) pairs. The tag
// is a varint-encoded integer, where the low 3 bits are the wire type, and the
// high bits are the field number.
//
// For the fields we care about, we'll be using the following wire types:
//
// Wire Type 0: Varint-encoded integer.
// Wire Type 2: Length-delimited. Used for strings and sub-messages.
enum class WireType {
  kVarint = 0,
  kLengthDelimited = 2,
};

#ifdef __linux__
// Returns the Phdr of the first segment of the given type.
const ElfW(Phdr) * GetFirstSegment(const dl_phdr_info* const info,
                                   const ElfW(Word) segment_type) {
  for (int i = 0; i < info->dlpi_phnum; ++i) {
    if (info->dlpi_phdr[i].p_type == segment_type) {
      return &info->dlpi_phdr[i];
    }
  }
  return nullptr;
}

// Return DT_SONAME for the given image.  If there is no PT_DYNAMIC or if
// PT_DYNAMIC does not contain DT_SONAME, return nullptr.
static const char* GetSoName(const dl_phdr_info* const info) {
  const ElfW(Phdr)* const pt_dynamic = GetFirstSegment(info, PT_DYNAMIC);
  if (pt_dynamic == nullptr) {
    return nullptr;
  }
  const ElfW(Dyn)* dyn =
      reinterpret_cast<ElfW(Dyn)*>(info->dlpi_addr + pt_dynamic->p_vaddr);
  const ElfW(Dyn)* dt_strtab = nullptr;
  const ElfW(Dyn)* dt_strsz = nullptr;
  const ElfW(Dyn)* dt_soname = nullptr;
  for (; dyn->d_tag != DT_NULL; ++dyn) {
    if (dyn->d_tag == DT_SONAME) {
      dt_soname = dyn;
    } else if (dyn->d_tag == DT_STRTAB) {
      dt_strtab = dyn;
    } else if (dyn->d_tag == DT_STRSZ) {
      dt_strsz = dyn;
    }
  }
  if (dt_soname == nullptr) {
    return nullptr;
  }
  ABSL_RAW_CHECK(dt_strtab != nullptr, "Unexpected nullptr");
  ABSL_RAW_CHECK(dt_strsz != nullptr, "Unexpected nullptr");
  const char* const strtab = reinterpret_cast<char*>(
      info->dlpi_addr + static_cast<ElfW(Word)>(dt_strtab->d_un.d_val));
  ABSL_RAW_CHECK(dt_soname->d_un.d_val < dt_strsz->d_un.d_val,
                 "Unexpected order");
  return strtab + dt_soname->d_un.d_val;
}

// Helper function to get the build ID of a shared object.
std::string GetBuildId(const dl_phdr_info* const info) {
  std::string result;

  // pt_note contains entries (of type ElfW(Nhdr)) starting at
  //   info->dlpi_addr + pt_note->p_vaddr
  // with length
  //   pt_note->p_memsz
  //
  // The length of each entry is given by
  //   Align(sizeof(ElfW(Nhdr)) + nhdr->n_namesz) + Align(nhdr->n_descsz)
  for (int i = 0; i < info->dlpi_phnum; ++i) {
    const ElfW(Phdr)* pt_note = &info->dlpi_phdr[i];
    if (pt_note->p_type != PT_NOTE) continue;

    const char* note =
        reinterpret_cast<char*>(info->dlpi_addr + pt_note->p_vaddr);
    const char* const last = note + pt_note->p_filesz;
    const ElfW(Xword) align = pt_note->p_align;
    while (note < last) {
      const ElfW(Nhdr)* const nhdr = reinterpret_cast<const ElfW(Nhdr)*>(note);
      if (note + sizeof(*nhdr) > last) {
        // Corrupt PT_NOTE
        break;
      }

      // Both the start and end of the descriptor are aligned by sh_addralign
      // (= p_align).
      const ElfW(Xword) desc_start =
          (sizeof(*nhdr) + nhdr->n_namesz + align - 1) & -align;
      const ElfW(Xword) size =
          desc_start + ((nhdr->n_descsz + align - 1) & -align);

      // Beware of wrap-around.
      if (nhdr->n_namesz >= static_cast<ElfW(Word)>(-align) ||
          nhdr->n_descsz >= static_cast<ElfW(Word)>(-align) ||
          desc_start < sizeof(*nhdr) || size < desc_start ||
          size > static_cast<ElfW(Xword)>(last - note)) {
        // Corrupt PT_NOTE
        break;
      }

      if (nhdr->n_type == NT_GNU_BUILD_ID) {
        const char* const note_name = note + sizeof(*nhdr);
        // n_namesz is the length of note_name.
        if (nhdr->n_namesz == 4 && memcmp(note_name, "GNU\0", 4) == 0) {
          if (!result.empty()) {
            // Repeated build-ids.  Ignore them.
            return "";
          }
          result = absl::BytesToHexString(
              absl::string_view(note + desc_start, nhdr->n_descsz));
        }
      }
      note += size;
    }
  }

  return result;
}
#endif  // __linux__

// A varint-encoded integer.
struct Varint {
  explicit Varint(uint64_t v) : value(v) {}
  explicit Varint(StringId v) : value(static_cast<uint64_t>(v)) {}
  explicit Varint(LocationId v) : value(static_cast<uint64_t>(v)) {}
  explicit Varint(MappingId v) : value(static_cast<uint64_t>(v)) {}

  uint64_t value;

  template <typename Sink>
  friend void AbslStringify(Sink& sink, const Varint& v) {
    char buf[10];
    char* p = buf;
    uint64_t u = v.value;
    while (u >= 0x80) {
      *p++ = static_cast<char>((u & 0x7f) | 0x80);
      u >>= 7;
    }
    *p++ = static_cast<char>(u);
    sink.Append(absl::string_view(buf, static_cast<size_t>(p - buf)));
  }
};

struct Tag {
  int field_number;
  WireType wire_type;

  template <typename Sink>
  friend void AbslStringify(Sink& sink, const Tag& t) {
    absl::Format(&sink, "%v",
                 Varint((static_cast<uint64_t>(t.field_number) << 3) |
                        static_cast<uint64_t>(t.wire_type)));
  }
};

struct LengthDelimited {
  int field_number;
  absl::string_view value;

  template <typename Sink>
  friend void AbslStringify(Sink& sink, const LengthDelimited& ld) {
    absl::Format(&sink, "%v%v%v",
                 Tag{ld.field_number, WireType::kLengthDelimited},
                 Varint(ld.value.size()), ld.value);
  }
};

struct VarintField {
  int field_number;
  Varint value;

  template <typename Sink>
  friend void AbslStringify(Sink& sink, const VarintField& vf) {
    absl::Format(&sink, "%v%v", Tag{vf.field_number, WireType::kVarint},
                 vf.value);
  }
};

}  // namespace

StringId ProfileBuilder::InternString(absl::string_view str) {
  if (str.empty()) return StringId(0);
  return string_table_.emplace(str, StringId(string_table_.size()))
      .first->second;
}

LocationId ProfileBuilder::InternLocation(const void* address) {
  return location_table_
      .emplace(absl::bit_cast<uintptr_t>(address),
               LocationId(location_table_.size() + 1))
      .first->second;
}

void ProfileBuilder::AddSample(
    int64_t value, absl::Span<const void* const> stack,
    absl::Span<const std::pair<StringId, int64_t>> labels) {
  std::string sample_proto;
  absl::StrAppend(
      &sample_proto,
      VarintField{SampleProto::kValue, Varint(static_cast<uint64_t>(value))});

  for (const void* addr : stack) {
    // Profile addresses are raw stack unwind addresses, so they should be
    // adjusted by -1 to land inside the call instruction (although potentially
    // misaligned).
    absl::StrAppend(
        &sample_proto,
        VarintField{SampleProto::kLocationId,
                    Varint(InternLocation(absl::bit_cast<const void*>(
                        absl::bit_cast<uintptr_t>(addr) - 1)))});
  }

  for (const auto& label : labels) {
    std::string label_proto =
        absl::StrCat(VarintField{LabelProto::kKey, Varint(label.first)},
                     VarintField{LabelProto::kNum,
                                 Varint(static_cast<uint64_t>(label.second))});
    absl::StrAppend(&sample_proto,
                    LengthDelimited{SampleProto::kLabel, label_proto});
  }
  samples_.push_back(std::move(sample_proto));
}

void ProfileBuilder::AddSampleType(StringId type, StringId unit) {
  std::string sample_type_proto =
      absl::StrCat(VarintField{ValueTypeProto::kType, Varint(type)},
                   VarintField{ValueTypeProto::kUnit, Varint(unit)});
  sample_types_.push_back(std::move(sample_type_proto));
}

MappingId ProfileBuilder::AddMapping(uintptr_t memory_start,
                                     uintptr_t memory_limit,
                                     uintptr_t file_offset,
                                     absl::string_view filename,
                                     absl::string_view build_id) {
  size_t index = mappings_.size() + 1;
  auto [it, inserted] = mapping_table_.emplace(memory_start, index);
  if (!inserted) {
    return static_cast<MappingId>(it->second);
  }

  Mapping m;
  m.start = memory_start;
  m.limit = memory_limit;
  m.offset = file_offset;
  m.filename = std::string(filename);
  m.build_id = std::string(build_id);

  mappings_.push_back(std::move(m));
  return static_cast<MappingId>(index);
}

std::string ProfileBuilder::Emit() && {
  std::string profile_proto;
  for (const auto& sample_type : sample_types_) {
    absl::StrAppend(&profile_proto,
                    LengthDelimited{ProfileProto::kSampleType, sample_type});
  }
  for (const auto& sample : samples_) {
    absl::StrAppend(&profile_proto,
                    LengthDelimited{ProfileProto::kSample, sample});
  }

  // Build mapping table.
  for (size_t i = 0, n = mappings_.size(); i < n; ++i) {
    const auto& mapping = mappings_[i];
    std::string mapping_proto = absl::StrCat(
        VarintField{MappingProto::kId, Varint(static_cast<uint64_t>(i + 1))},
        VarintField{MappingProto::kMemoryStart, Varint(mapping.start)},
        VarintField{MappingProto::kMemoryLimit, Varint(mapping.limit)},
        VarintField{MappingProto::kFileOffset, Varint(mapping.offset)},
        VarintField{MappingProto::kFilename,
                    Varint(InternString(mapping.filename))},
        VarintField{MappingProto::kBuildId,
                    Varint(InternString(mapping.build_id))});

    absl::StrAppend(&profile_proto,
                    LengthDelimited{ProfileProto::kMapping, mapping_proto});
  }

  // Build location table.
  for (const auto& [address, id] : location_table_) {
    std::string location =
        absl::StrCat(VarintField{LocationProto::kId, Varint(id)},
                     VarintField{LocationProto::kAddress, Varint(address)});

    if (!mappings_.empty()) {
      // Find the mapping ID.
      auto it = mapping_table_.upper_bound(address);
      if (it != mapping_table_.begin()) {
        --it;
      }

      // If *it contains address, add mapping to location.
      const size_t mapping_index = it->second;
      const Mapping& mapping = mappings_[mapping_index - 1];

      if (it->first <= address && address < mapping.limit) {
        absl::StrAppend(
            &location,
            VarintField{LocationProto::kMappingId,
                        Varint(static_cast<uint64_t>(mapping_index))});
      }
    }

    absl::StrAppend(&profile_proto,
                    LengthDelimited{ProfileProto::kLocation, location});
  }

  std::string string_table_proto;
  std::vector<absl::string_view> sorted_strings(string_table_.size());
  for (const auto& p : string_table_) {
    sorted_strings[static_cast<size_t>(p.second)] = p.first;
  }
  for (const auto& s : sorted_strings) {
    absl::StrAppend(&string_table_proto,
                    LengthDelimited{ProfileProto::kStringTable, s});
  }
  absl::StrAppend(&profile_proto, VarintField{ProfileProto::kDropFrames,
                                              Varint(drop_frames_id_)});
  absl::StrAppend(&profile_proto,
                  VarintField{ProfileProto::kComment, Varint(comment_id_)});
  absl::StrAppend(&profile_proto, VarintField{ProfileProto::kDefaultSampleType,
                                              Varint(default_sample_type_id_)});
  return absl::StrCat(string_table_proto, profile_proto);
}

void ProfileBuilder::set_drop_frames_id(StringId drop_frames_id) {
  drop_frames_id_ = drop_frames_id;
}

void ProfileBuilder::set_comment_id(StringId comment_id) {
  comment_id_ = comment_id;
}

void ProfileBuilder::set_default_sample_type_id(
    StringId default_sample_type_id) {
  default_sample_type_id_ = default_sample_type_id;
}

void ProfileBuilder::AddCurrentMappings() {
#ifdef __linux__
  dl_iterate_phdr(
      +[](dl_phdr_info* info, size_t, void* data) {
        auto& builder = *reinterpret_cast<ProfileBuilder*>(data);

        // Skip dummy entry introduced since glibc 2.18.
        if (info->dlpi_phdr == nullptr && info->dlpi_phnum == 0) {
          return 0;
        }

        const bool is_main_executable = builder.mappings_.empty();

        // Evaluate all the loadable segments.
        for (int i = 0; i < info->dlpi_phnum; ++i) {
          if (info->dlpi_phdr[i].p_type != PT_LOAD) {
            continue;
          }
          const ElfW(Phdr)* pt_load = &info->dlpi_phdr[i];

          ABSL_RAW_CHECK(pt_load != nullptr, "Unexpected nullptr");

          // Extract data.
          const size_t memory_start = info->dlpi_addr + pt_load->p_vaddr;
          const size_t memory_limit = memory_start + pt_load->p_memsz;
          const size_t file_offset = pt_load->p_offset;

          // Storage for path to executable as dlpi_name isn't populated for the
          // main executable.  +1 to allow for the null terminator that readlink
          // does not add.
          char self_filename[PATH_MAX + 1];
          const char* filename = info->dlpi_name;
          if (filename == nullptr || filename[0] == '\0') {
            // This is either the main executable or the VDSO.  The main
            // executable is always the first entry processed by callbacks.
            if (is_main_executable) {
              // This is the main executable.
              ssize_t ret = readlink("/proc/self/exe", self_filename,
                                     sizeof(self_filename) - 1);
              if (ret >= 0 &&
                  static_cast<size_t>(ret) < sizeof(self_filename)) {
                self_filename[ret] = '\0';
                filename = self_filename;
              }
            } else {
              // This is the VDSO.
              filename = GetSoName(info);
            }
          }

          char resolved_path[PATH_MAX];
          absl::string_view resolved_filename;
          if (realpath(filename, resolved_path)) {
            resolved_filename = resolved_path;
          } else {
            resolved_filename = filename;
          }

          const std::string build_id = GetBuildId(info);

          // Add to profile.
          builder.AddMapping(memory_start, memory_limit, file_offset,
                             resolved_filename, build_id);
        }
        // Keep going.
        return 0;
      },
      this);
#endif  // __linux__
}

}  // namespace debugging_internal
ABSL_NAMESPACE_END
}  // namespace absl
