import sqlite3
from sqlite3 import Error
[docs]class Database:
"""
Class for database. Accepts a name to create a new database, and use the database based off of that name.
"""
def __init__(self, database_name="c_source", default_tables=True):
"""
Creates a database connection and default tables if enabled.
:param database_name: the database name to create or use.
:param default_tables: whether to have the default tables or user create their own.
"""
# create a database connection or the database if not found
self.database_name = database_name # database name to create
self.conn = self.create_connection(self.database_name) # connection
self.sql_transaction = [] # transaction builder
self.offset = 0
# check if we have the connection and assign the cursor
if self.conn is not None:
self.cursor = self.conn.cursor()
# if setting it up by defaults to True, if false, then user must create own database
if default_tables:
# IMPORTANT: dates must be in format YYYY-MM-DD
sql_create_meta_table = """CREATE TABLE IF NOT EXISTS meta_table (
author_repo_key text PRIMARY KEY NOT NULL,
repo_name text NOT NULL,
license text,
url text UNIQUE,
author text NOT NULL,
filter_approval_date date NOT NULL,
llvm_gen_date date NOT NULL,
filter_date date NOT NULL,
compilation_date date NOT NULL,
master_download_date date NOT NULL
);"""
sql_create_ml_table = """CREATE TABLE IF NOT EXISTS ml_table (
file_path text PRIMARY KEY NOT NULL,
author_repo_key text NOT NULL,
source_code text UNIQUE NOT NULL,
cleaned_source_code text UNIQUE NOT NULL,
object_file blob UNIQUE NOT NULL,
elf blob UNIQUE NOT NULL,
llvm_unop text UNIQUE NOT NULL,
llvm_op text UNIQUE NOT NULL,
assembly text UNIQUE NOT NULL,
FOREIGN KEY (author_repo_key) REFERENCES meta_table (author_repo_key) ON DELETE CASCADE ON UPDATE CASCADE
);"""
# test if connection was successful
if self.conn is not None:
# create meta table
self.create_table(sql_create_meta_table)
# create ML table
self.create_table(sql_create_ml_table)
else:
print("Error: Cannot create the database connection. Does it exist? Right directory?")
[docs] def create_connection(self, db_file):
"""
create a database connection to the SQLite database specified by db_file
:param db_file: database file
:return: Connection object or None
"""
try:
self.conn = sqlite3.connect("{}.db".format(db_file))
return self.conn
except Error as e:
print(e)
return None
[docs] def create_table(self, create_table_sql):
"""
create a table from the create_table_sql statement
:param create_table_sql: a CREATE TABLE statement
:return: bool or nothing.
"""
try:
self.cursor.execute(create_table_sql)
return True
except Error as e:
print(e)
[docs] def find_repo(self, repo):
"""
finds a repo by the name, and get all its files
:param repo: the repository
:return: sql info or none
"""
try:
# get the repo and its files
sql = "SELECT * FROM ml_table WHERE author_repo_key = '{}'".format(repo)
self.cursor.execute(sql)
# after selecting, get all matching files
result = self.cursor.fetchall()
# check if result is there after fetching
if result is not None:
return result # return all results, for just one use result[0]
else:
return False
except Exception as e:
print("find_parent", str(e))
return False
[docs] def find_file(self, file_name):
"""
finds a file by the name, and get all its files
:param file_name: the info for one file.
:return: sql info or none
"""
try:
# get the repo and its files
sql = "SELECT * FROM ml_table WHERE file_path = '{}'".format(file_name)
self.cursor.execute(sql)
# after selecting, get all matching files
result = self.cursor.fetchall()
# check if result is there after fetching
if result is not None:
return result # return all results, for just one use result[0]
else:
return False
except Exception as e:
print("find_parent", str(e))
return False
[docs] def insert_ml(self, ml_tuple, override=False):
"""
inserts or replaces, if it exists, a new ml row by adding it to the transaction builder. Can override.
:param ml_tuple: file_path, author_repo_key, source_code, cleaned_source_code, object_file, llvm_unop, and llvm_op
:type: tuple - str (not null), str (not null), str (not null), str (not null), binary (not null), str (not null), str (not null),
binary (not null), str (not null)
:param override: to override the transaction to immediately process the query or not.
:type: bool
:return: bool or nothing
"""
try:
sql = "REPLACE INTO ml_table(file_path, author_repo_key, source_code, cleaned_source_code, object_file, elf, llvm_unop, llvm_op, assembly) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
self.__transaction_builder(sql, ml_tuple, override)
return True
except Error as e:
print(e)
return False
[docs] def delete_ml(self, file_name):
"""
delete ml data with this file name.
:param file_name: the file name to delete.
:param override: whether or not to override the transaction builder. Useful if we want it immediately
or not going to have greater than 1000 transactoins.
:return: nothing
"""
try:
sql = """DELETE FROM ml_table WHERE file_path = '{}'""".format(file_name)
self.cursor.execute(sql)
return True
except Error as e:
print(e)
return False
[docs] def get_repos(self, limit=100):
"""
gets 100 random repos, or the number provided
:param limit: the number of random repos to get
:return:
"""
try:
sql = """SELECT * FROM ml_table ORDER BY RANDOM() LIMIT '{}'""".format(limit)
self.cursor.execute(sql)
# after selecting, get all matching files
result = self.cursor.fetchall()
# check if result is there after fetching
if result is not None:
return result # return all results, for just one use result[0]
else:
return False
except Error as e:
print(e)
return False
def __transaction_builder(self, sql_query, sql_values, override=False):
"""
populates the database using transactions instead of 1 by 1
:param sql_query: the sql query
:param sql_values: the info to add to the sql info.
:param override: override the > 1000 transaction builder.
:return: nothing
"""
# create new object
sql = {"sql_query": sql_query, "sql_values": sql_values}
# then add and check the length
self.sql_transaction.append(sql)
if len(self.sql_transaction) >= 50 or override:
print("Beginning transaction... inserting into database...")
# begin transaction, insert 1000
self.cursor.execute("BEGIN TRANSACTION")
for s in self.sql_transaction:
try:
self.cursor.execute(s["sql_query"], s["sql_values"]) # execute the transaction
# print('executing transactions')
except Error as e:
print("error: ", str(e))
pass
self.conn.commit() # commit once done
self.sql_transaction = [] # empty it out