LinuxQuestions.org - Time quota based access control to specific websites

- Linux - Server (https://www.linuxquestions.org/questions/linux-server-73/)

- - Time quota based access control to specific websites (https://www.linuxquestions.org/questions/linux-server-73/time-quota-based-access-control-to-specific-websites-775913/)

Time quota based access control to specific websites

UPDATED: you can read my original post below

As I couldn't find any program that did what I wanted I set out to write my own.

My chosen language was C++ because this URL re-writer need to be FAST (faster than python or perl anyway) and because C++ is a language I am familiar with.

First things first I chose my method of squid integration. Initially I was using the ACL (access control lists) and ensuring that every request pass an external ACL check to be allowed but I found this to be problematic, I’m not sure exactly why but I think it has something to do with the way squid optimises ACL performance. Using url_rewriter_program alleviated this issue.
As per the requirements from my original post I came up with bellow design (in flow chart form)

http://www.edspcs.com.au/faceblock.png

The database to fulfill this design has three tables that I set up like this:

Code:

blacklist:

url(pk) //web address of banned website



request:

id (pk)

srcIP //src ip of web request

requestUrl //url of request rescource

requestTime //time of most recent request



status

flushDate (pk)





CREATE TABLE blacklist

(

url varchar(250) not null primary key

);



CREATE TABLE request

(

id MEDIUMINT  NOT NULL AUTO_INCREMENT,

srcIP varchar(15) not null,

requestUrl varchar(250) not null,

requestTime time not null,

PRIMARY KEY (id)

);





CREATE TABLE status

(

flushDate DATE not null primary key

);

And then the implementation was as follows

Code:

using namespace std;

#include <iostream>

#include <fstream>

#include <mysql++.h>

#include <time.h>

#include <sstream>



#define USER "root"

#define SERVER "localhost"

#define PASS "tstfu646"

#define DB "eo"



//returns the time corosponding to the START of the current access period as a value

struct tm GetAccessPeriod()

{

        time_t rawtime;

        struct tm * timeinfo;

        time ( &rawtime );

        timeinfo = localtime ( &rawtime );        

        mktime ( timeinfo );

        

        if(timeinfo->tm_hour < 12)

        {

                //time is before 12pm

                timeinfo->tm_sec = 0;

                timeinfo->tm_min = 0;

                timeinfo->tm_hour = 0;

                return *timeinfo; //value of timeinfo

        }

        else if(timeinfo->tm_hour >= 12 && timeinfo->tm_hour <= 14)

        {

                //time is between 12pm and 2pm

                timeinfo->tm_sec = 0;

                timeinfo->tm_min = 0;

                timeinfo->tm_hour = 12;

                return *timeinfo; //value of timeinfo

        }

        else if(timeinfo->tm_hour > 14)

        {

                //time is after 2pm

                timeinfo->tm_sec = 0;

                timeinfo->tm_min = 0;

                timeinfo->tm_hour = 14;

                return *timeinfo; //value of timeinfo

        }

}



int GetQuotaSeconds(struct tm * ap)

{

        if(ap->tm_hour < 12)

        {

                return 600;

        }

        else if(ap->tm_hour >= 12 && ap->tm_hour <= 14)

        {

                return 1800;

        }

        else if(ap->tm_hour > 14)

        {

                return 600;

        }

}



//converts a struct tm to a time string that can be used as an sql TIME type

//returns as value

string Stringify(struct tm * time)

{

        string rData = "";

        stringstream out;

        out << time->tm_hour << ":" << time->tm_min << ":" << time->tm_sec;

        rData = out.str();

        return rData;

}



void Tokenise(const string& str, vector<string>& tokens, const string& delimiters = " ")

{

    // Skip delimiters at beginning.

    string::size_type lastPos = str.find_first_not_of(delimiters, 0);

    // Find first "non-delimiter".

    string::size_type pos    = str.find_first_of(delimiters, lastPos);



    while (string::npos != pos || string::npos != lastPos)

    {

        // Found a token, add it to the vector.

        tokens.push_back(str.substr(lastPos, pos - lastPos));

        // Skip delimiters.  Note the "not_of"

        lastPos = str.find_first_not_of(delimiters, pos);

        // Find next "non-delimiter"

        pos = str.find_first_of(delimiters, lastPos);

    }

}



int main()

{



        //local variables:

        string url, ip;

        time_t rawtime; //time/date in seconds since 1970

        mysqlpp::Date lastDate; //use to store the flush date queried from db

        mysqlpp::Date date;        //used to store current date in form to be inserted

        struct tm ap; //start of access period

        struct tm * curTime; //the current time

        mysqlpp::StoreQueryResult res; //stores query results

        mysqlpp::Time::Time t;

        mysqlpp::Time firstRequest; //time of first request

        int ct, fr; //seconds since 00:00 for first request and current time

        char buff [2048];

        vector<string> tokens;

        

        time ( &rawtime );



        ofstream logfile;

        logfile.open("/usr/local/bin/log.txt");

        

        // Connect to db

        mysqlpp::Connection conn;

        if (conn.connect(DB, SERVER, USER, PASS)) 

        {

                mysqlpp::Query query = conn.query("");

                

                //this loop should not exit and should be blocked by cin waiting on input from squid

                while(true)

                {

                        // logfile << "started loop at time: " << rawtime <<endl;

                

                        cin.getline(buff, 2048);

                        Tokenise(string(buff), tokens);

                        if(tokens.size() > 1)

                        {

                                url = tokens[0];

                                ip = tokens[1];

                        }

                        tokens.clear();

                        

                        //get current date:

                        time ( &rawtime );

                        date = mysqlpp::Date(rawtime);

                        curTime = localtime ( &rawtime );



                        // logfile << "request for: " << url << " from: " << ip  << " at: " << Stringify(curTime) <<endl;



                        //get last date from DB

                        query << "SELECT flushDate FROM status";

                                        

                        if (res = query.store()) 

                        {

                                lastDate = mysqlpp::Date(res[0][0]);

                        }

                        query.reset();

                        

                        if(date.compare(lastDate) > 0) //if it's a new day

                        {

                                // logfile << "cur date after last date" <<endl;

                                

                                //flush request

                                try

                                {

                                query << "DELETE FROM request";

                                query.execute();

                                }

                                catch(mysqlpp::BadQuery e)

                                {

                                        // logfile << e.what() <<endl;

                                }

                                query.reset();

                                

                                //update date to today

                                query << "UPDATE status SET flushDate = '" + date.str() +"'";

                                query.execute();

                                query.reset();                        

                        }

                        else

                        {

                                // logfile << "same day" <<endl;

                                query << "select url from blacklist where '" + url + "' LIKE url";

                                res = query.store();        

                                query.reset();

                                if(res.num_rows() > 0)

                                {

                                        //quota code goes here:

                                        // logfile <<  res.num_rows() << " items in blacklist matching " << url <<endl;

                                        t = mysqlpp::Time::Time(rawtime);

                                        query << "insert into request (srcIP, requestUrl, requestTime) values('" + ip + "', '" + url + "', '" + t.str() + "')";

                                        query.execute();

                                        query.reset();

                                        ap = GetAccessPeriod();

                                        curTime = localtime ( &rawtime );



                                        

                                        query << "select min(requestTime) as time from request where requestTime > '" + Stringify(&ap) + "' AND srcIP = '" + ip + "'";

                                        res = query.store();        

                                        // logfile << res.num_rows() << " requests from: " <<ip <<endl;

                                        firstRequest = mysqlpp::Time(res[0][0]);

                                        // logfile << "first request was on: " << firstRequest.str() <<endl;

                                        fr = firstRequest.second() + 60*firstRequest.minute() + 3600*firstRequest.hour();

                                        ct = curTime->tm_sec + 60*curTime->tm_min + 3600*curTime->tm_hour;

                                        fr += GetQuotaSeconds(&ap);

                                        //cout << "fr: " << fr << " ct: " << ct <<endl;

                                        // logfile <<ip << " " <<url << " first request plus quota is: " << fr << " the time of current request is: " << ct  << " time difference : " << fr-ct <<endl;

                                        if(fr > ct)

                                        {

                                                cout << buff << endl;

                                                // logfile << "banned, but inside quota, OK" <<endl;

                                        }

                                        else

                                        {

                                                cout << "302:http://192.168.1.201/denied.gif" << endl;

                                                // logfile << "banned, outside quota, ERR" <<endl;

                                        }

                                }

                                else

                                {

                                        cout << buff << endl;

                                        // logfile << "not banned, OK" <<endl;

                                }

                                query.reset();

                        }



                        if(url == "exit")

                        {

                                break;

                        }        

                }

                logfile.close();

                

        }

        else

        {

                // logfile << "connection fail" <<endl;

        }

        return 0;

}

So if I have implemented a solution what is my question?
Well it could be that I am at the wrong place for this kind of help but
A:

As I said this code needs to be fast, could I get some feedback on my design. One thing of concern to me is that atm all requests to a banned site are logged, this could be a LOT of requests and while this table is emptied every day it could still get way to big. Maybe a solution that used update instead of insert and just updated the request time?

B:

Code quality, have I made any mistakes with memory management etc.

C: [most important]

How could I share this code with other people. I only wrote this because I couldn’t find free software which did it for me. What is the best way to get this into a form that other net admins can use? Ill need a lot of help with this I think as I have never used autoconfig before or anything like that.
-----------------------------------------------------

ORIGINAL POST:

I'm the network administrator of a fairly small network (<100 stations) and my work is generally just doing tech support for the staff here and managing services like samba mail etc. My boss recently approached me with a more difficult task however, she would like to limit the amount of time each staff member can access certain websites, allocating them a quota for example:

Bob can only view facebook for 10 minutes before 12:00, betweem 12:00 and 13:00 he gets another 30 minutes of quota, and then after 13:00 he gets another 10 minute sof quota.

Ideally this quota would be consumed only when a staff member was viewing a page but I don't think this is entirely possibly, perhaps the quota would simply start counting down once the page is first accessed?

This system must be implemented for $0 using entirely free software and hardware we have sitting around the office.

Now I understand how to implement more basic filtering I could set up a transparent proxy using squid, which already supports user authentication, and then use something like Dansguardian for URL blacklisting. My questions is: is there free software which can handle this kind of time quota based control?

If there is not what would be the best DIY approach. I was thinking I could write a simple squid log parser of my own (in C or python or whatever) but this may be more work than is necessary. Is there perhaps a generic squid parsing tool which supports scripting that could do some of the work for me?

I could be totally off the mark and perhaps a proxy is not the best approach at all? I could add special logging rules to iptables and go from there but it seems to me that would be a lot more work.

Thanks in advance, capo.

Well, given that web pages are downloaded to your wkstn browser, not streamed live (although some content may be), I think(?) maybe you'd just have to count num of accesses, rather than amt of 'time' spent reading a page (can't see how you'd do that).
A simpler soln would be to have access time-bands eg only access non-work sites eg gmail between 1200-1300.
Of course, I could be completely wrong.. ;)

Quote:

Originally Posted by chrism01 (Post 3792985)

Thats what I meant when i said it would probably be impossible to time how long someone was viewing a page, but you could check when someone first accessed a page and count down from that time. I suggested to my boss having time bands of access but she said that she would like both times bands a time quotas if possible.

Would it help if you explained (in a simple manner) how webpages work ie they are just simple downloads, so time quotas doesn't really make sense eg if someone surfs to a new page every 60 secs, when would the countdown start/stop??
Would it (time out) renew for each page??
You'd also need a 'global' reset for each user, otherwise they could download 1 page at 9am, which would timeout, then they'd be locked out of the web for the rest of the day.
Could get very messy.

Quote:

Originally Posted by chrism01 (Post 3793104)

The time quota is not for browsing, just for accessing specific pages ie. You get 3 access a day, after you request a page for the first time that day a countdown starts and you get unlimited requests to that domain until the countdown ends, at which point all request to that domain are denied until a certain time (say 12:00) at which point you get another 10 minutes of requests, starting from the first request.

Quote:

3 access a day, after you request a page for the first time that day a countdown starts and you get unlimited requests to that domain until the countdown ends

so 3 domains per day then, not 3 pages?

How does the '10 mins' fit in with that method?
Note the 'unlimited requests' in the quote'

Quote:

Originally Posted by chrism01 (Post 3793285)

so 3 domains per day then, not 3 pages?

How does the '10 mins' fit in with that method?
Note the 'unlimited requests' in the quote'

no they can access all pages EXCEPT those on specific list limitlessly.

For those on the list they get say 3 periods of access per day. These periods of access start from the first request to that domain and they are allowed unlimited requests until their time is up.

so if I access facebook.com at 11:05 I can then browse facebook until 11:15 at which point requests will be denied until 12:00 when I am allotted a new period of access, but it does not start at 12:00 it starts when I first make a request to facebook.com after 12:00 and then ends 10 minutes after that first request.

OK, that's a bit clearer.
What happens to time overlaps eg taking your example above, suppose the user logs into facebook at 11:58. How does that affect the 'after 12:00' rule? Would you have a flag that can tell when someone's 'first' access is and then allows them to 'access' facebook from 11:59-12:09, then immediately start (aka continue) a new 10 mins 12:10-12:20 ?
Just trying to clarify here.
What happens if they don't login to facebook at all until eg 4:30pm: do they get 3 x 10 mins periods or only one?
It's the corner cases that make programming interesting ;)

Quote:

Originally Posted by chrism01 (Post 3794509)

these boundary cases are interesting but it's not really my question, I don't need programming help, i'm just looking for advice on what tools i can use to minimize the amount of programming ill have to do, ie. is there already a tool that can do this?

I admit I've never tried to do this. I'm kind of surprised no-one else has jumped in by now.
Have you had a look at things like netnanny, websense etc?
Theoretically, for a DIY approach, Squid / dansguardian and some Perl is where I'd start looking.
I'll be interested to know the answer myself.

Ok so i started working a solution using squid and python but i'm having some issues wiring up a simple python script to a squid ACL. My python script is:

Code:

import sys

i = 0

while i < 5:

        a = sys.stdin.readlines()

        if a == "exit":

                exit()

        if len(a) != 0:

                print "OK"

so it's just an infinite loop that gobbles up text from std in and returns OK every time it gets something, the idea being that squid will be piping over data and the scirpt just returns OK over and over.

My relevant squid config looks like:

Code:



external_acl_type faceblock children=5 %SRC /usr/bin/python /usr/local/bin/faceblock.py



acl eostaff external faceblock



http_access allow localnet eostaff

but if i request a page it just sits there loading for ever, it would appear that the python script never returns and hangs squid.

With my debug mode set to:

Code:

debug_options ALL,1 33,2 28,9

the first request after restarting squid appears in cache.log:

Code:

2009/12/23 13:09:22| aclCheckFast: list: 0x7fc3db15abb8

2009/12/23 13:09:22| aclMatchAclList: checking all

2009/12/23 13:09:22| aclMatchAcl: checking 'acl all src all'

2009/12/23 13:09:22| aclMatchIp: '192.168.1.188' found

2009/12/23 13:09:22| aclMatchAclList: returning 1

2009/12/23 13:09:22| aclCheck: checking 'http_access allow manager localhost'

2009/12/23 13:09:22| aclMatchAclList: checking manager

2009/12/23 13:09:22| aclMatchAcl: checking 'acl manager proto cache_object'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access deny manager'

2009/12/23 13:09:22| aclMatchAclList: checking manager

2009/12/23 13:09:22| aclMatchAcl: checking 'acl manager proto cache_object'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access allow purge localhost'

2009/12/23 13:09:22| aclMatchAclList: checking purge

2009/12/23 13:09:22| aclMatchAcl: checking 'acl purge method PURGE'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access deny purge'

2009/12/23 13:09:22| aclMatchAclList: checking purge

2009/12/23 13:09:22| aclMatchAcl: checking 'acl purge method PURGE'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access deny !Safe_ports'

2009/12/23 13:09:22| aclMatchAclList: checking !Safe_ports

2009/12/23 13:09:22| aclMatchAcl: checking 'acl Safe_ports port 80                # http'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access deny CONNECT !SSL_ports'

2009/12/23 13:09:22| aclMatchAclList: checking CONNECT

2009/12/23 13:09:22| aclMatchAcl: checking 'acl CONNECT method CONNECT'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

2009/12/23 13:09:22| aclCheck: checking 'http_access allow localnet eostaff'

2009/12/23 13:09:22| aclMatchAclList: checking localnet

2009/12/23 13:09:22| aclMatchAcl: checking 'acl localnet src 192.168.1.0/24        # RFC1918 possible internal network'

2009/12/23 13:09:22| aclMatchIp: '192.168.1.188' found

2009/12/23 13:09:22| aclMatchAclList: checking eostaff

2009/12/23 13:09:22| aclMatchAcl: checking 'acl eostaff external faceblock'

2009/12/23 13:09:22| aclMatchAclList: no match, returning 0

after that subsequent requests to not appear in the log file.

Any ideas?

updated original post considerably.