For do this experiment I made both algorithms in Python, then I put a sub procedure that repeat 30 times, change the value for how long are the pattern, the text and the amount of letters that we use to choose for have different prospective about how the algorithm works, it is important to know that for do the patters I use a function that do string random and also create a pattern randomly.
First at all, the following image you can see a map color where we can find 3 different values plotted, how long is the pattern, how long is the text that we check with the pattern and, finally I put a variable that checks the time that the computer wait for do the search, then when the algorithm finish I return this value and I used for have an estimation with the time.
In this image, you can see that if we have a pattern that are just for 1 to 4 letters, we have an algorithm that can do the search and in a time (that is plot in color) and there is a point that have the color almost red, which means that we have problems with the time to do the search of the pattern, but is just in some search, because in the following map of the next algorithm BM, we have more stress but down for the medium.
In this other map, we have that there is not stress as the another algorithm, because we do not have points where the time are up of the medium, but we have more points in purple, which means that we have a lot stress and up for medium.
This is another test, when we can see that BM algorithm is more purple, so, takes more time to search the words.
This is the code:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
import time | |
def genera_str(tam, lista): | |
return ''.join(random.choice(lista) for x in range(tam)) | |
def algoritmo_KMP(texto, patron): | |
inicio = time.time() | |
tam_texto = len(texto) | |
tam_patron = len(patron) | |
M = 0 | |
index = 0 | |
if (tam_texto >= tam_patron): | |
tabla = [] | |
for i in range(tam_patron): | |
tabla.append(0) | |
cont = 0 | |
for i in range(1, tam_patron): | |
while cont > 0 and patron[i] != patron[cont]: | |
cont = tabla[cont - 1] | |
if patron[i] == patron[cont]: | |
cont += 1 | |
tabla[i] = cont | |
tabla.insert(0, -1) | |
Tfallos = tabla[:-1] | |
while ((index <= tam_patron) and (M < tam_texto)): | |
if (texto[M+index] == patron[index]): | |
if (index == (tam_patron - 1)): | |
fin = time.time() | |
tiempo = fin - inicio | |
return M, tiempo | |
index += 1 | |
else: | |
M = M + index - Tfallos[index] | |
if (index > 0): | |
index = Tfallos[index] | |
def algoritmo_BM(texto, patron): | |
inicio = time.time() | |
tam_patron = len(patron) | |
tam_texto = len(texto) | |
mapa = {} | |
for i in range(tam_patron-1, -1, -1): | |
inter = patron[i] | |
if (inter not in mapa): | |
mapa[inter] = i | |
iguales = [] | |
cabecera = 0 | |
while cabecera + (tam_patron - 1) < tam_texto: | |
for i_patron in range(tam_patron-1, -1, -1): | |
i_lista = cabecera + i_patron | |
cor_y = patron[i_patron] | |
cor_x = texto[i_lista] | |
if i_lista >= tam_texto: | |
break | |
if cor_y != cor_x: | |
derecho = mapa.get(cor_x) | |
if cor_x not in mapa: | |
cabecera = i_lista + 1 | |
else: | |
recorro = i_lista - (cabecera + derecho) | |
cabecera += (recorro > 0 and recorro or cabecera + 1) | |
break | |
elif i_patron == 0: | |
fin = time.time() | |
tiempo = fin - inicio | |
return cabecera, tiempo | |
def experimento(tam_cadena): | |
cadena = [] | |
for i in range(tam_cadena): | |
letra = random.choice(string.ascii_uppercase) | |
cadena.append(letra) | |
for tam_texto in range(10, 1000, 10): | |
for tam_patron in range(1,10): | |
for i in range(1,30): | |
main(tam_texto, tam_patron, cadena) | |
def main(tam_1, tam_2, letras): | |
T = genera_str(tam_1, letras) | |
P = genera_str(tam_2, letras) | |
try: | |
num_KMP, tiempo_KMP = algoritmo_KMP(T, P) | |
num_BM, tiempo_BM = algoritmo_BM(T, P) | |
if num_BM == None and tiempo_BM == None: | |
num_BM = 0 | |
tiempo_BM = 0.0 | |
if num_KMP == None and tiempo_KMP == None: | |
num_KMP = 0 | |
tiempo_BM = 0.0 | |
if num_BM != 0: | |
num_BM = 1 | |
if num_KMP != 0: | |
num_KMP = 1 | |
except: | |
num_KMP = 0 | |
num_BM = 0 | |
tiempo_BM = 0.0 | |
tiempo_KMP = 0.0 | |
print "%d %d %f %f %d" % (tam_1, tam_2, tiempo_KMP, tiempo_BM, len(letras),) | |
for i in range(1,10): | |
experimento(i) |
Plot in gnuplot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set grid layerdefault | |
set term png | |
set title 'KMP' | |
set ylabel 'texto' | |
set xlabel 'patron' | |
set xrange[1:9] | |
set label "" | |
set output "grafica_2.png" | |
set key off | |
plot "datos.txt" using 2:1:3 with points palette pt 7 ps 3 | |
set grid layerdefault | |
set term png | |
set title 'BM' | |
set ylabel 'texto' | |
set xlabel 'patron' | |
set xrange[1:9] | |
set label "" | |
set output "grafica_3.png" | |
set key off | |
plot "datos.txt" using 2:1:4 with points palette pt 7 ps 3 |
Código 5. Es más fácil comparar si visualizas de la misma manera las dos cosas. Reporte 4.
ResponderEliminar