I'm trying to compare the hash of local files with the hash from the GitHub
API;
I have created test.txt
and uploaded it to my repo, when I call compute_sha1_hash
on
test.txt
:
2aae6c35c94fcfb415dbe95f408b9ce91ee846ed
The GitHub
API response:
"path": "test.txt",
"mode": "100644",
"type": "blob",
"sha": "95d09f2b10159347eece71399a7e2e907ea3df4f",
From what I've searched looks like GitHub uses SHA1
, why the values doesn't match?
Do I need to do any kind of calc in this hash to retrieve the "local file hash"?
I have written a working example:
#include <curl/curl.h>
#include <openssl/evp.h>
#include <fstream>
#include <vector>
#include <sstream>
#include <iomanip>
#include <iostream>
#include <regex>
std::string compute_sha1_hash(const std::string& file_path)
{
std::ifstream file(file_path, std::ios::binary);
if (!file) {
throw std::runtime_error("Could not open file");
}
EVP_MD_CTX* mdctx = EVP_MD_CTX_new();
if (mdctx == NULL) {
throw std::runtime_error("Failed to create EVP_MD_CTX");
}
if (EVP_DigestInit_ex(mdctx, EVP_sha1(), NULL) != 1) {
throw std::runtime_error("Failed to initialize SHA-1 context");
}
std::vector<char> buffer(4096);
while (file.read(buffer.data(), buffer.size())) {
if (EVP_DigestUpdate(mdctx, buffer.data(), file.gcount()) != 1) {
throw std::runtime_error("Failed to update SHA-1 context");
}
}
if (file.gcount() > 0) {
if (EVP_DigestUpdate(mdctx, buffer.data(), file.gcount()) != 1) {
throw std::runtime_error("Failed to update SHA-1 context");
}
}
std::vector<unsigned char> hash_value(EVP_MD_size(EVP_sha1()));
unsigned int digest_len;
if (EVP_DigestFinal_ex(mdctx, hash_value.data(), &digest_len) != 1) {
throw std::runtime_error("Failed to finalize SHA-1 context");
}
EVP_MD_CTX_free(mdctx);
// Convert to hex
std::ostringstream oss;
for (const auto& byte : hash_value) {
oss << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte);
}
return oss.str();
}
// Brian613542705's answer
std::string compute_sha1_hash_2(const std::string& file_path)
{
std::ifstream file(file_path, std::ios::binary);
if (!file) {
throw std::runtime_error("Could not open file");
}
// Read the content of the file
std::ostringstream oss;
oss << file.rdbuf();
std::string content = oss.str();
// Prepend "blob " and the size of the content
std::string preparedContent = "blob " + std::to_string(content.size()) + "\0" + content;
EVP_MD_CTX* mdctx = EVP_MD_CTX_new();
if (mdctx == NULL) {
throw std::runtime_error("Failed to create EVP_MD_CTX");
}
if (EVP_DigestInit_ex(mdctx, EVP_sha1(), NULL) != 1) {
throw std::runtime_error("Failed to initialize SHA-1 context");
}
if (EVP_DigestUpdate(mdctx, preparedContent.c_str(), preparedContent.size()) != 1) {
throw std::runtime_error("Failed to update SHA-1 context");
}
std::vector<unsigned char> hash_value(EVP_MD_size(EVP_sha1()));
unsigned int digest_len;
if (EVP_DigestFinal_ex(mdctx, hash_value.data(), &digest_len) != 1) {
throw std::runtime_error("Failed to finalize SHA-1 context");
}
EVP_MD_CTX_free(mdctx);
// Convert to hex
std::ostringstream sha1oss;
for (const auto& byte : hash_value) {
sha1oss << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte);
}
return sha1oss.str();
}
static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
size_t realsize = size * nmemb;
auto& mem = *static_cast<std::string*>(userp);
mem.append(static_cast<char*>(contents), realsize);
return realsize;
}
void curl(std::string& data, const char* url)
{
CURL* curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_TCP_KEEPALIVE, 0);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L); // redirects
CURLcode res = curl_easy_perform(curl_handle);
if(res != CURLE_OK)
std::cerr << "curl_easy_perform failed: " << curl_easy_strerror(res) << '\n';
curl_easy_cleanup(curl_handle);
curl_global_cleanup();
}
int main()
{
std::string fileHash = compute_sha1_hash("C:/Users/jabu/test.txt");
std::string fileHash_2 = compute_sha1_hash_2("C:/Users/jabu/test.txt");
std::string jsonString;
curl(jsonString, "https://api.github.com/repos/jajabu33/test/git/trees/main?recursive=1");
// just to kick compare the strings
std::regex shaPattern("\"sha\":\\s*\"([a-fA-F0-9]+)\"");
std::sregex_iterator shaIterator(jsonString.begin(), jsonString.end(), shaPattern);
std::sregex_iterator endIterator;
std::string githubHash;
for (; shaIterator != endIterator; ++shaIterator)
{
std::smatch match = *shaIterator;
if (match.size() >= 2)
githubHash = match[1].str();
}
if (fileHash == githubHash)
std::cout << "Hashes are equal\n";
else
std::cout << "Hashes are not equal\n";
return 0;
}
EDIT
I have added the function compute_sha1_hash_2
which does what Brian61354270
has answered, but the hashes still doesn't match.