#include "stdafx.h" #include <stdio.h> #include<CString> #include <windows.h> #include <fstream> #include <iostream> #include<string> #include "mysql.h" //#include <Dbghelp.h> //ImageRvaToVa #include <io.h> #include <imagehlp.h> #include <map> #pragma comment (lib, "imagehlp.lib") using namespace std; string s1, s2; char * cc; char * c2[5000000]; //Counter for total API calls int myID = 1; struct aa { char * ch; }aa[500]; int snum = 0; void getapi(char* szStr) { int i,j; //Allocate memory WCHAR wszClassName[256]; memset(wszClassName, 0, sizeof(wszClassName)); MultiByteToWideChar(CP_ACP, 0, szStr, strlen(szStr) + 1, wszClassName, (sizeof(wszClassName) / sizeof(wszClassName[0]))); //Attempt to create file LPCWSTR ss = wszClassName; HANDLE hFile = CreateFile(ss, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); //If we couldn't create the file then return if(hFile == INVALID_HANDLE_VALUE) { printf("Create File Failed, skipping this file.\n"); return; } //Attempt file mapping HANDLE hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL); //If file mapping failed then return if (hFileMapping == NULL || hFileMapping == INVALID_HANDLE_VALUE) { printf("Could not create file mapping object (%d), skipping this file.\n", GetLastError()); return; } //Attempt to set the starting point - base address LPBYTE lpBaseAddress = (LPBYTE)MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0, 0); //If we don't have the base address then return if (lpBaseAddress == NULL) { printf("Could not map view of file (%d), skipping this file.\n", GetLastError()); return; } //If we get to here then there were no problems with the file or mapping and we're good to go PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)lpBaseAddress; PIMAGE_NT_HEADERS pNtHeaders = (PIMAGE_NT_HEADERS)(lpBaseAddress + pDosHeader>e_lfanew); //Point to the import table DWORD Rva_import_table = pNtHeaders>OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress; //If we can't find the import table then get out of here if(Rva_import_table == 0) { printf("no import table, skipping this file.\n"); goto UNMAP_AND_EXIT; } PIMAGE_IMPORT_DESCRIPTOR pImportTable = (PIMAGE_IMPORT_DESCRIPTOR)ImageRvaToVa(pNtHeaders, lpBaseAddress, Rva_import_table, NULL); //Subtract base address to get the actual address of the import table // printf("FileAddress Of ImportTable: %p\n", ((DWORD)pImportTable (DWORD)lpBaseAddress)); IMAGE_IMPORT_DESCRIPTOR null_iid; IMAGE_THUNK_DATA null_thunk; memset(&null_iid, 0, sizeof(null_iid)); memset(&null_thunk, 0, sizeof(null_thunk)); //Loop through import table for(i = 0; memcmp(pImportTable + i, &null_iid, sizeof(null_iid)) != 0; i++) { LPCSTR szDllName = (LPCSTR)ImageRvaToVa(pNtHeaders, lpBaseAddress, pImportTable[i].Name, NULL); string s123 = szDllName; if (s123 =="MSVBVM60.DLL") return; if (s123 =="MSVCP110D.DLL") return; PIMAGE_THUNK_DATA32 pThunk = (PIMAGE_THUNK_DATA32)ImageRvaToVa(pNtHeaders, lpBaseAddress, pImportTable[i].FirstThunk, NULL); for(j = 0; memcmp(pThunk + j, &null_thunk, sizeof(null_thunk)) != 0; j++) { if(pThunk[j].u1.AddressOfData & IMAGE_ORDINAL_FLAG32) { //printf("\t [%d] \t %ld \t \n", j, pThunk[j].u1.AddressOfData & 0xffff); } else { PIMAGE_IMPORT_BY_NAME pFuncName = (PIMAGE_IMPORT_BY_NAME)ImageRvaToVa(pNtHeaders, lpBaseAddress, pThunk[j].u1.AddressOfData, NULL); if (snum == 646) cc = cc; //setting cc = API call cc = (char *)pFuncName->Name; if (cc == "_CIcos") return; if (cc == "SOFTWARE\\Borland\\Delphi\\RTL") return; //If we see any of the following characters then the file we are looking at is packed or encrypted and we will disregard if(cc[0] == '?') return; if(cc[0] == '@') return; c2[snum] = cc; //increment snum for use in loop and again further below snum++; } } } // printf("\n\nSNUM: %d\n\n", snum); UNMAP_AND_EXIT: CloseHandle(hFileMapping); CloseHandle(hFile); return; } void identifyMalicious(char filepath[], char name[]){ printf("Discovered Malicious File: %s\n", name); printf("In Directory: %s\n\n", filepath); } //This method is used to populate the API table as well as the File table and takes a different integer parameter depending on which it is doing void populateTable(MYSQL *ptr, int table, int ref, char filepath[]){ //This will contain our directory path char str[100]; //Location of directory to be scanned. This will eventually be a variable that the user selects from the interface strcpy_s (str,filepath); long Handle; struct _finddata_t fileinfo; std::string files = filepath; files.append("*"); //If we can't find the directory if((Handle = _findfirst(files.c_str(), &fileinfo))==-1L) printf("didnt find files\n"); //If we find the directory else { //find the first file _findnext(Handle,&fileinfo); //until there are no more files while(_findnext(Handle,&fileinfo) == 0) { //This will contain the signature if we are populating the File table char signature[200000]; signature[0] = '\0'; //Allocate memory memset(str, 0, sizeof(str)); //Set str back to the directory location strcpy_s(str,filepath); //Then concatenate the actual file name to the end of the directory strcat_s(str,fileinfo.name); //reset snum snum = 0; if(table == 1){ printf("Extracting API calls from file: %s\n", fileinfo.name); } else if(table == 2){ printf("Creating API signature for file: %s\n", fileinfo.name); } else{ printf("Scanning file: %s\n", fileinfo.name); } //Use the getapi method to populate c2 with all of the APIs getapi(str); //loop through every API call in the file. for (int i = 0; i < snum; i++) { //This will contain our MySQL query char query[600]; //If we're populating the API_CALL table if(table == 1){ //MySQL query to insert ID and API call // printf("File: %s\n", fileinfo.name); sprintf_s(query, "insert into API_CALLS (ID, API) values (%d,'%s')", myID, c2[i]); //Print API to console for demonstration and testing purposes // printf("%d.) %s ", i, query); //Attempt to insert API into database and increment total API counter if successful if(mysql_query(ptr, query) == 0){ myID++; } //handles line spacing for printing to console // cout<<c2[i]<<endl; } //Generate API signature if(table == 2 || table == 3){ // printf("File Name: %s\n", fileinfo.name); char *API_ID; MYSQL_RES *result; MYSQL_ROW row; //Get the ID of the API we are currently looking at //Need to consider case where API is not in the table!!!!!!! sprintf_s(query, "select ID from API_CALLS where API = '%s'", c2[i]); if(mysql_real_query(ptr, query, (UINT)strlen(query)) != 0){ printf("Query failed\n"); } if(!(result = mysql_use_result(ptr))){ printf("Fetch failed\n"); } //Append the ID to the string to create a list of ID numbers while(row = mysql_fetch_row(result)){ API_ID = row[0]; string temp = signature; if(temp.find(API_ID) == std::string::npos){ if(signature[0] == '\0'){ strcat_s(signature, API_ID); } else{ strcat_s(signature, ","); strcat_s(signature, API_ID); } } } } } //Once we're out of the for loop we need to add this signature to the File_DB table if(table == 2 && snum != 0){ //This will contain our MySQL query char query[200000]; //MySQL query to insert File name, ref, and signature if(ref == 0) { sprintf_s(query, "insert into File_DB (File_name, ref, signature) values ('%s', 0, '%s')", fileinfo.name, signature); } else { sprintf_s(query, "insert into File_DB (File_name, ref, signature) values ('%s', -1, '%s')", fileinfo.name, signature); } // printf("%s", query); //Attempt to insert file signature into database if(mysql_query(ptr, query) != 0){ printf("signature insert failed"); } } //Classification with Naive Bayes if(table == 3 && snum != 0){ char query[200000]; std::string s(signature); s = s + '\0'; std::string delimiter = ","; MYSQL_RES *result; MYSQL_ROW row; //arrays of probabilities double mal_prob[2000]; double ben_prob[2000]; int counter = 0; double total_id; double malicious_id; //set = 1 so when we multiply by probability array we don't get null * x double prob_mal = 1; double prob_ben = 1; size_t pos = 0; std::string ID; //loop through every ID in signature and create probability arrays for classification based on the FILE_DB table while((pos = s.find(delimiter)) != std::string::npos){ // printf("POS: %d\n", pos); ID = s.substr(0, pos); // printf("ID: %s\n", ID.c_str()); //Now select row count from file_db where signature contains ID sprintf_s(query, "select count(*) from FILE_DB where signature LIKE '%%%s%%'", ID.c_str()); // printf("Query: %s\n", query); if(mysql_real_query(ptr, query, (UINT)strlen(query)) != 0){ printf("Query failed\n"); } if(!(result = mysql_use_result(ptr))){ printf("Fetch failed\n"); } while(row = mysql_fetch_row(result)){ //convert string to int total_id = atoi(row[0]); // printf("Total ID: %f\n", total_id); // printf("Total count: %s\n", row[0]); // printf("Total count: %f\n", total_id); } //Now select row count from file_db where signature contains ID and is malicious sprintf_s(query, "select count(*) from FILE_DB where ref != 0 and signature LIKE '%%%s%%'", ID.c_str()); // printf("Query: %s\n", query); if(mysql_real_query(ptr, query, (UINT)strlen(query)) != 0){ printf("Query failed\n"); } if(!(result = mysql_use_result(ptr))){ printf("Fetch failed\n"); } while(row = mysql_fetch_row(result)){ //convert string to int malicious_id = atoi(row[0]); // printf("Malicious ID: %f\n", malicious_id); // printf("Malicious count: %s\n", row[0]); // printf("Malicious count: %f\n", malicious_id); } mal_prob[counter] = (malicious_id / total_id); ben_prob[counter] = ((total_id - malicious_id) / total_id); // printf("Mal Prob: %f\n", mal_prob[counter]); // printf("Ben Prob: %f\n", ben_prob[counter]); counter = counter + 1; //erase this part of the string to be able to move forward s.erase(0, pos + delimiter.length()); } //loop through both probability arrays and find the total probability for(int x = 0; x < counter; x++){ if(mal_prob[x] == 0) { mal_prob[x] = 0.00001; ben_prob[x] = 0.99999; } if(ben_prob[x] == 0) { ben_prob[x] = 0.00001; mal_prob[x] = 0.99999; } prob_mal = (prob_mal * mal_prob[x]); prob_ben = (prob_ben * ben_prob[x]); // printf("Prob Mal: %f\n", prob_mal); // printf("Prob Ben: %f\n", prob_ben); // printf("Mal Prob: %f\n", (malicious_id/total_id)); // printf("Ben Prob: %f\n", ((total_id-malicious_id)/total_id)); } // printf("Prob Mal: %f\n", prob_mal); // printf("Prob Ben: %f\n", prob_ben); if(prob_mal >= prob_ben) { identifyMalicious(filepath, fileinfo.name); } // printf("Malicious Probability: %f\n", prob_mal); // printf("Benign Probability: %f\n", prob_ben); } } _findclose(Handle); } } int _tmain(int argc, _TCHAR* argv[]) { //MySQL connection stuff MYSQL *conn_ptr; MYSQL_RES *res; MYSQL_ROW row; char query[1024]; char API[20]; string ss_mysql; conn_ptr = mysql_init(NULL); //If MySQL initiation fails then return if(!conn_ptr){ printf("mysql_init_failed\n"); return EXIT_FAILURE; } //Attempt to connect to the local MySQL server with the credentials passed as parameters conn_ptr = mysql_real_connect(conn_ptr,"localhost","root","1234","test",0,NULL,0); //If the connection was made we're golden if(conn_ptr){ printf("Connection success\n"); } //Otherwise we have a problem. Consider returning if this happens, but for now we'll just let it play out to test the other components else{ printf("Error connecting to database: %s\n", mysql_error(conn_ptr)); } //End of MySQL connection stuff //For demonstrational purposes we will start with empty tables and populate them //Populate API_CALL Table with all APIs found populateTable(conn_ptr, 1, 0, "C:\\481_malicious\\"); populateTable(conn_ptr, 1, 0, "C:\\481_benign\\"); populateTable(conn_ptr, 1, 0, "C:\\Test_Files\\"); //Populate FILE_DB Table with malicious signature and benign signatures populateTable(conn_ptr, 2, -1, "C:\\481_malicious\\"); populateTable(conn_ptr, 2, 0, "C:\\481_benign\\"); //Scan a directory and find all malicious files populateTable(conn_ptr, 3, 999, "C:\\Test_Files\\"); system("pause"); return 0; }