#!/usr/bin/perl 

use strict;
use warnings;

use URI::Escape;
use LWP::Simple;
use XML::Simple;

while (<>) {
  /id=(\d+)\s+text=(.*)$/ or die "bad input $_";
  my $id = $1;
  $_ = $2;

  my $firsttag;
  if (s/^<p><strong>([^<]{2,50}?)<\/strong>: //) {
    $firsttag = lc $1;
  }

  s/<.*?>//gs;
  s/\&nbsp;/ /gs;
  s/\&lt;/</gs;
  s/\&gt;/</gs;
  s/\&amp;/&/gs;
  s/\&[a-z0-9A-Z]+;//gs;
  s/\\r//gs;
  s/\\n/ /gs;
  s/\\//gs;

  # limit to first 2k
  s/^(.{2000,}?) .*$/$1/g;

  # print "$_\n";

  my $uri = "http://tagthe.net/api/?text=".uri_escape($_);

  eval {
    my $doc = get $uri;

    my $xml = XMLin($doc);

    my $tags;
    foreach my $dim (@{$xml->{meme}->{dim}}) {
      next if ($dim->{type} ne 'topic');
      $tags = $dim->{item};
    }

    if (!defined $tags) {
      use Data::Dumper; print Data::Dumper->Dump([$xml]);
      die "no tags found on $id";
    }

    set_tags($id, $firsttag, $tags);
  };

  sleep 5;
}


sub set_tags {
  my ($id, $firsttag, $tags) = @_;

  $| = 1;
  print "id=$id ";
  my %seen = ();
  foreach my $tag ($firsttag, sort @$tags) {
    $tag = lc $tag;
    next if $seen{$tag};
    $seen{$tag}++;
    print "t=$tag ";
  }
  print "\n";
}


