SPECS: pldnotify.awk - get_links() optimization (even 6+ times
fas...
qboosh
qboosh at pld-linux.org
Tue Dec 20 23:03:03 CET 2005
Author: qboosh Date: Tue Dec 20 22:03:03 2005 GMT
Module: SPECS Tag: HEAD
---- Log message:
- get_links() optimization (even 6+ times faster on perl-*, but needs some testing)
---- Files affected:
SPECS:
pldnotify.awk (1.52 -> 1.53)
---- Diffs:
================================================================
Index: SPECS/pldnotify.awk
diff -u SPECS/pldnotify.awk:1.52 SPECS/pldnotify.awk:1.53
--- SPECS/pldnotify.awk:1.52 Sat Oct 15 14:50:29 2005
+++ SPECS/pldnotify.awk Tue Dec 20 23:02:58 2005
@@ -138,7 +138,7 @@
return 0
}
-function get_links(url, errno,link,oneline,retval,odp,tmpfile) {
+function get_links(url, errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile) {
# get all <A HREF=..> tags from specified URL
"mktemp /tmp/XXXXXX" | getline tmpfile
close("mktemp /tmp/XXXXXX")
@@ -154,30 +154,32 @@
if (errno==0) {
while (getline oneline < tmpfile)
- odp=(odp " " oneline)
- if ( DEBUG ) print "Response: " odp
+ wholeodp=(wholeodp " " oneline)
+ if ( DEBUG ) print "Response: " wholeodp
}
close(tmpfile)
system("rm -f " tmpfile)
urldir=url;
sub(/[^\/]+$/,"",urldir)
+
if ( errno==0) {
- while ((tolower(odp) ~ /<frame[ \t]/)||(tolower(odp) ~ /href=/)) {
- if (tolower(odp) ~ /<frame[ \t]/) {
- match(tolower(odp),/<frame[ \t][^>]*>/)
- ramka=substr(odp,RSTART,RLENGTH)
- odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
- sub(/[sS][rR][cC]=[ \t]*/,"src=",ramka);
- match(ramka,/src="[^"]+"/)
- newurl=substr(ramka,RSTART+5,RLENGTH-6)
+ while (match(wholeodp, /<([aA]|[fF][rR][aA][mM][eE])[ \t][^>]*>/) > 0) {
+ odp=substr(wholeodp,RSTART,RLENGTH);
+ wholeodp=substr(wholeodp,RSTART+RLENGTH);
+
+ lowerodp=tolower(odp);
+ if (lowerodp ~ /<frame[ \t]/) {
+ sub(/[sS][rR][cC]=[ \t]*/,"src=",odp);
+ match(odp,/src="[^"]+"/)
+ newurl=substr(odp,RSTART+5,RLENGTH-6)
if (DEBUG) print "Frame: " newurl
if (newurl !~ /\//) {
newurl=(urldir newurl)
if (DEBUG) print "Frame->: " newurl
}
retval=(retval " " get_links(newurl))
- } else if (tolower(odp) ~ /href=[ \t]*"[^"]*"/) {
+ } else if (lowerodp ~ /href=[ \t]*"[^"]*"/) {
sub(/[hH][rR][eE][fF]=[ \t]*"/,"href=\"",odp)
match(odp,/href="[^"]*"/)
link=substr(odp,RSTART,RLENGTH)
@@ -185,7 +187,7 @@
link=substr(link,7,length(link)-7)
retval=(retval " " link)
if (DEBUG) print "href(\"\"): " link
- } else if (tolower(odp) ~ /href=[ \t]*'[^']*'/) {
+ } else if (lowerodp ~ /href=[ \t]*'[^']*'/) {
sub(/[hH][rR][eE][fF]=[ \t]*'/,"href='",odp)
match(odp,/href='[^']*'/)
link=substr(odp,RSTART,RLENGTH)
@@ -193,7 +195,7 @@
link=substr(link,7,length(link)-7)
retval=(retval " " link)
if (DEBUG) print "href(''): " link
- } else if (tolower(odp) ~ /href=[ \t]*[^ \t>]*/) {
+ } else if (lowerodp ~ /href=[ \t]*[^ \t>]*/) {
sub(/[hH][rR][eE][fF]=[ \t]*/,"href=",odp)
match(odp,/href=[^ \t>]*/)
link=substr(odp,RSTART,RLENGTH)
@@ -202,8 +204,8 @@
retval=(retval " " link)
if (DEBUG) print "href(): " link
} else {
- retval=(retval " INTERNAL_ERROR")
- break
+ # <a ...> but not href - skip
+ if (DEBUG) print "skipping <a > without href: " odp
}
}
} else {
================================================================
---- CVS-web:
http://cvs.pld-linux.org/SPECS/pldnotify.awk?r1=1.52&r2=1.53&f=u
More information about the pld-cvs-commit
mailing list