#!/usr/bin/perl
use Mysql;
# MYSQL CONFIG VARIABLES
$host = "localhost:3306";
$database = "somedb";
$user = "someuser";
$pw = "somepw";
# PERL MYSQL CONNECT()
$connect = Mysql->connect($host, $database, $user, $pw);
# SELECT DB
$connect->selectdb($database);
# DEFINE A VARIABLES
$imtmquery = "SELECT machines.id, manufacturers.name, machines.model, machines.created FROM machines LEFT JOIN manufacturers ON machines.manufacturer_id = manufacturers.id";
@imtmstatics = ("blaa", "blubb");
$globusquery = "SELECT machines.id, manufacturers.name, machines.model, machines.created FROM machines LEFT JOIN manufacturers ON machines.manufacturer_id = manufacturers.id WHERE globushp = 1 OR globusspecial = 1 OR globusstart = 1 OR globusstartspecial = 1";
@globusstatics = ("blaa", "blubb");
$xmlheader = '<?xml version="1.0" encoding="UTF-8" ?><urlset xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
$xmlfooter = '</urlset>';
sub print_machine_link
{
return "<url>
<loc>http://www.".$_[0]."/".$_[5]."/showmach/".$_[1]."-".$_[2]."-".$_[3]."</loc>
<lastmod>".$_[4]."+00:00</lastmod>
<changefreq>monthly</changefreq>
</url>";
}
sub print_page_link
{
return "<url>
<loc>http://www.".$_[0]."/".$_[1]."/".$_[2]."</loc>
<lastmod></lastmod>
<changefreq>monthly</changefreq>
</url>";
}
open (IMTMDE, '>imtm_sitemap_de.xml');
open (IMTMEN, '>imtm_sitemap_en.xml');
open (IMTMZH, '>imtm_sitemap_zh.xml');
open (IMTMRU, '>imtm_sitemap_ru.xml');
print IMTMDE $xmlheader;
print IMTMEN $xmlheader;
print IMTMZH $xmlheader;
print IMTMRU $xmlheader;
# RENDER STATIC IMTM PAGES
for ($i=0;$i<@imtmstatics;$i++){
print IMTMDE print_page_link("imtm.com", "de", $imtmstatics[$i]);
print IMTMEN print_page_link("imtm.com", "en", $imtmstatics[$i]);
print IMTMZH print_page_link("imtm.com", "zh", $imtmstatics[$i]);
print IMTMRU print_page_link("imtm.com", "ru", $imtmstatics[$i]);
}
# EXECUTE IMTM QUERY
$execute = $connect->query($imtmquery);
while (@results = $execute->fetchrow()) {
$manufacturer = lc($results[1]);
$model = lc($results[2]);
$timestamp = $results[3];
$manufacturer =~ tr/A-Za-z_0-9/-/c;
$model =~ tr/A-Za-z_0-9/-/c;
$timestamp =~ tr/ /T/;
print IMTMDE print_machine_link("imtm.com", $results[0], $manufacturer, $model, $timestamp, "de");
print IMTMEN print_machine_link("imtm.com", $results[0], $manufacturer, $model, $timestamp, "en");
print IMTMZH print_machine_link("imtm.com", $results[0], $manufacturer, $model, $timestamp, "zh");
print IMTMRU print_machine_link("imtm.com", $results[0], $manufacturer, $model, $timestamp, "ru");
}
print IMTMDE $xmlfooter;
print IMTMEN $xmlfooter;
print IMTMZH $xmlfooter;
print IMTMRU $xmlfooter;
close (IMTMDE);
close (IMTMEN);
close (IMTMZH);
close (IMTMRU);
open (GLOBUSDE, '>globus_sitemap_de.xml');
open (GLOBUSEN, '>globus_sitemap_en.xml');
open (GLOBUSZH, '>globus_sitemap_zh.xml');
open (GLOBUSRU, '>globus_sitemap_ru.xml');
print GLOBUSDE $xmlheader;
print GLOBUSEN $xmlheader;
print GLOBUSZH $xmlheader;
print GLOBUSRU $xmlheader;
# RENDER STATIC Globus PAGES
for ($i=0;$i<@globusstatics;$i++){
print GLOBUSDE print_page_link("globus-trading.com", "de", $globusstatics[$i]);
print GLOBUSEN print_page_link("globus-trading.com", "en", $globusstatics[$i]);
print GLOBUSZH print_page_link("globus-trading.com", "zh", $globusstatics[$i]);
print GLOBUSRU print_page_link("globus-trading.com", "ru", $globusstatics[$i]);
}
# EXECUTE Globus QUERY
$execute = $connect->query($globusquery);
while (@results = $execute->fetchrow()) {
$manufacturer = lc($results[1]);
$model = lc($results[2]);
$manufacturer =~ tr/A-Za-z_0-9/-/c;
$model =~ tr/A-Za-z_0-9/-/c;
$timestamp = $results[3];
$timestamp =~ tr/ /T/;
print GLOBUSDE print_machine_link("globus-trading.com", $results[0], $manufacturer, $model, $timestamp, "de");
print GLOBUSEN print_machine_link("globus-trading.com", $results[0], $manufacturer, $model, $timestamp, "en");
print GLOBUSZH print_machine_link("globus-trading.com", $results[0], $manufacturer, $model, $timestamp, "zh");
print GLOBUSRU print_machine_link("globus-trading.com", $results[0], $manufacturer, $model, $timestamp, "ru");
}
print GLOBUSDE $xmlfooter;
print GLOBUSEN $xmlfooter;
print GLOBUSZH $xmlfooter;
print GLOBUSRU $xmlfooter;
close (GLOBUSDE);
close (GLOBUSEN);
close (GLOBUSZH);
close (GLOBUSRU);
system "gzip imtm_sitemap_de.xml";
system "gzip imtm_sitemap_en.xml";
system "gzip imtm_sitemap_zh.xml";
system "gzip imtm_sitemap_ru.xml";
system "gzip globus_sitemap_de.xml";
system "gzip globus_sitemap_en.xml";
system "gzip globus_sitemap_zh.xml";
system "gzip globus_sitemap_ru.xml";
Refactorings
No refactoring yet !
draegtun
March 15, 2010, March 15, 2010 10:34, permalink
Have a look at the Perl5 wiki on best practices: http://www.perlfoundation.org/perl5/index.cgi?perl_best_practices
The PBP book listed there is a good start. Make sure you also read the commentary listed because this book was written circa 2005. To keep up-to-date with even more modern practices then you need to keep an eye on Planet Iron Man (http://ironman.enlightenedperl.org/). In particular make sure you read Modern Perl Books blog (http://www.modernperlbooks.com/)
Run Perl::Critic over your code and it will provide advice on what to change to make it fall in line with best practices. For starters it will warn you on the following:
* use strict pragma
* use warning pragma
* use autodie pragma
* declare variables (my / our)
* three arg open with lexical variable (for eg. open my $fh, '>', 'output.txt';
Now there few ways you could refactor your code. And as speed seems to be important than this will affect how you refactor / optimise it. However you can write the Perl code "exactly" like your Ruby example. Here is a cut-down version to give you a leg-up:
/I3az/
use strict;
use warnings;
use 5.010;
use autodie;
use Builder;
use DateTime;
use DateTimeX::Easy;
use PerlIO::gzip;
sub render_page_sitemap {
my ($scope, $sitemapname, $statics, $machines, $lang) = @_;
open my $file, '>:gzip', "$sitemapname\_$lang.xml.gz";
my $builder = Builder->new( output => $file );
my $xml = $builder->block( 'Builder::XML', { indent => 4, newline => 1 } );
say {$file} '<?xml version="1.0" encoding="UTF-8" ?>';
$xml->urlset( urlset_attrib(), sub {
for my $page (@$statics) {
$xml->url( sub {
$xml->loc( url_page_link( lang => $lang, scope => $scope, page => $page ));
$xml->lastmod( DateTime->now->ymd );
$xml->changefreq( "daily" );
});
}
for my $mach (@$machines) {
$xml->url( sub {
$xml->loc( url_machine_link(
id => $mach->{id},
lang => $lang,
scope => $scope,
manufacturer => 1, # for eg
model => 1, # for eg
));
$xml->lastmod( DateTimeX::Easy->new( $mach->{created} )->strftime("%Y-%m-%dT%H:%M:%S+00:00") );
$xml->changefreq( "monthly" );
});
}
});
}
sub url_page_link {
my (%a) = @_;
my $host = host_bit( $a{scope} );
return qq!http://$host/$a{lang}/$a{page}!;
}
sub url_machine_link {
my (%a) = @_;
my $host = host_bit( $a{scope} );
return qq!http://$host/$a{lang}/showmach/$a{id}-$a{manufacturer}-$a{model}!;
}
sub host_bit {
return 'www.globus-trading.com' if $_[0] eq 'globus';
return "www.$_[0].com";
}
sub urlset_attrib {
return {
"xmlns:xsi" => 'http://www.w3.org/2001/XMLSchema-instance',
"xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd",
"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9"
};
}
# tasks
{
my @staticpages = qw/index impressum kontakt wir/;
my @machines = (
{ id => 'mach1', created => '2010-02-22 10:10:10' },
{ id => 'mach2', created => '2010-01-12 10:10:10' },
);
render_page_sitemap( 'imtm', 'imtm_sitemap', \@staticpages, \@machines, $_ )
for qw/de en zh ru/
}
{
my @staticpages = qw/
index
anfahrt
anfrage
downloads
impressum
kontakt
produktionslinien
wir
/;
my @machines = (
{ id => 'mach100', created => '2010-02-22 10:10:10' },
{ id => 'mach200', created => '2010-01-12 10:10:10' },
);
render_page_sitemap( 'globus', 'globus_sitemap', \@staticpages, \@machines, $_ )
for qw/de en zh ru/
}
I recently rewrote my Ruby on Rails based Sitemap generation Script to Perl to speed it up significantly
(before runtime ~ 6 minutes, after ~ 3 seconds)
Unfortunately I'm a total noob regarding perl :-(
So I guess my perl script is not very cleverly structured ;-).
Maybe any of you could help me a bit with improving my code?!?! I would be more then thankful to learn some "best practices" :-)
### Functional "Requirements"
- I've got two pages i want to generate sitemaps for (imtm.com / globus-trading.com). Both pages are using the same database, but a different subset of data
- both pages are availlable in 4 languages
- the generated sitemaps are quite large (> 30000 entries)
- I already got static "overall sitemaps" that link to the generated language dependent sitemaps
You can find my old, Rails based code at:
http://refactormycode.com/codes/1178-speeding-up-sitemap-generation