Commit 39511dba authored by Koosuri, Sai Akhilesh (Student)'s avatar Koosuri, Sai Akhilesh (Student)
Browse files

Update README.md

parent c1f8d58d
# Database Assignment 2
# sai akhilesh Koosuri
# Student ID: 12583943
# Assignmnent - 2
import os
import pandas
import numpy as np
import time
from IPython.display import display
import hashlib
import os.path
import csv
from csv import writer
df = pandas.read_csv(
os.path.dirname(os.path.abspath(__file__))+'/dataset.csv', encoding='utf-8')
filecount = 0
linecount = 1
while True:
df.loc[[linecount-1]].to_csv(str(filecount)+'modified.csv',
index=False,
header=False,
mode='a')
if linecount % 100000 == 0:
filecount = filecount + 1
linecount = linecount+1
# Merge Sort
####################################################################
for i in range(0, 12):
X = pandas.read_csv(str(i) + 'modified.csv', encoding='utf-8', header=None)
X = X.sort_values(by=X.columns[26], axis=0, ascending=True,
inplace=False, kind='mergesort', na_position='last')
X.to_csv(str(i) + 'sorted.csv', index=False, encoding='utf-8')
# Joining the sorted files
X = pandas.read_csv("0sorted.csv", encoding='utf-8', header=None)
Y = pandas.read_csv("1sorted.csv", encoding='utf-8', header=None)
Z = pandas.concat([X, Y], ignore_index=True)
for i in range(2, 12):
Y = pandas.read_csv(str(i) + "sorted.csv", encoding='utf-8', header=None)
Z = pandas.concat([Z, Y], ignore_index=True)
Z = Z.sort_values(by=Z.columns[26], axis=0, ascending=True,
inplace=False, kind='mergesort', na_position='last')
Z.to_csv('mergesorted.csv', index=False, encoding='utf-8')
# Linear Search
#############################################################
start = time.time()
for i in range(len(Z)):
if (Z.iloc[i, 25] == "Sandman: Dream Hunters 30th Anniversary Edition"):
print(Z.iloc[i, :])
end = time.time()
print(f"Runtime of the program is {end - start}")
# Hash Indexes
################################################################
Z = pandas.read_csv('mergesorted.csv', encoding='utf-8', header=None)
for i in range(len(Z)):
# creates hash index for each title with 10 possible bins
value = hash(Z.iloc[i, 25]) % 10
exists = os.path.exists(str(value)+'.csv')
if exists == 'True': # checks to see if a bin for that value already exists
# opens the appropriate bin/file
with open(str(value)+'.csv', mode='a', index=False, header=False) as f:
writer = csv.writer(f)
# writes the new title to the file
writer.writerow(Z.iloc[i, :])
f.close()
else:
# creates bin/file if one didn't exist
with open(str(value) + '.csv', mode='a', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(Z.iloc[i, :]) # writes title to the file
f.close()
# Hash Search
################################################
title = "Sandman: Dream Hunters 30th Anniversary Edition"
start = time.time()
value = hash(title) % 10
# find the corresponding bin and loads as a df
X = pandas.read_csv(str(value) + '.csv')
for i in range(len(X)): # for loop that parses through df
if (X.iloc[i, 25] == "Sandman: Dream Hunters 30th Anniversary Edition"):
print(X.iloc[i, :]) # prints the tuple with that title
end = time.time()
print(f"Runtime of the program is {end - start}")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment