Subversion Repositories DevTools

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6914 dpurdie 1
package com.erggroup.buildtool.daemon;
2
 
3
import com.erggroup.buildtool.ripple.Package;
4
import com.erggroup.buildtool.ripple.ReleaseConfig;
5
import com.erggroup.buildtool.ripple.ReleaseManager;
6
import com.erggroup.buildtool.utilities.utilities;
7
import com.erggroup.buildtool.daemon.NagiosThread;
8
 
9
import java.io.File;
10
import java.io.IOException;
11
import java.net.ServerSocket;
12
import java.sql.SQLException;
13
import java.util.ArrayList;
14
import java.util.Iterator;
15
import java.util.Map;
16
import java.util.concurrent.ConcurrentLinkedQueue;
17
 
18
import org.apache.log4j.Logger;
19
import org.apache.log4j.xml.DOMConfigurator;
20
 
21
/**BuildDaemon sub component and entry point (main BuildDaemon thread)
22
 */
23
public class BuildDaemon
24
{
25
 
26
    /**hostname
27
     * @attribute
28
     */
29
    static String mHostname;
30
 
31
    /**GBE_LOG
32
     * @attribute
33
     */
34
    static String mGbeLog;
35
 
36
    /** mShutDown
37
	 * @attribute
38
	 *  Request to shutdown the build system gracefully
39
	 */
40
	static boolean mShutDown = false;
41
 
42
    /**Logger
43
     * @attribute
44
     */
45
    private static final Logger mLogger = Logger.getLogger(BuildDaemon.class);
46
 
47
    /**Collection of ThreadIdentifier objects.
48
     * Using a ConcurrentLinkedQueue because we add and remove items from the collection
49
     * and it is being accessed from multiple threads.
50
     * @attribute
51
     */
52
    private ConcurrentLinkedQueue<ThreadIdentifier> mThreadCollection = new ConcurrentLinkedQueue<ThreadIdentifier>();
53
 
54
    /** Last time we did a poll for new builds
55
     * 
56
     */
57
    private long mLastBuildPoll = 0;
58
 
59
    /** Build Process poll time in seconds
60
     *  Modified on error to slow the poll rate
61
     */
62
    private int mPollTime = 3;
63
 
64
    /**Nagios
65
     * @attribute
66
     */
67
    ServerSocket nagiosSrv;
68
    NagiosThread nagiosChecker;
69
 
70
    /** Local class to assist in reporting Nagios Status
71
     * 
72
     */
73
    public static class NagiosInfo {
74
        int        threadCount = 0;
75
        int        threadAliveCount = 0;
76
        int        masterCount = 0;
77
        int        slaveCount = 0;
78
        ArrayList<String> extendedReason  = new ArrayList<String>();
79
    }
80
 
81
    /**mThreadCollection items
82
     */
83
    private class ThreadIdentifier
84
    {
85
        /**rcon_id associated with the thread
86
         * @attribute
87
         */
88
        private final int mRconId;
89
 
90
        /**thread identifier
91
         * @attribute
92
         */
93
        private final BuildThread mThread;
94
 
95
        /**constructor
96
         */
97
        ThreadIdentifier(int rconId, BuildThread thread)
98
        {
99
            mLogger.debug("ThreadIdentifier " + rconId);
100
            mRconId = rconId;
101
            mThread = thread;
102
        }
103
 
104
        /**accessor
105
         */
106
        int getRconId()
107
        {
108
            mLogger.info("get_rcon_id returned " + mRconId);
109
            return mRconId;
110
        }
111
 
112
        /**accessor
113
         */
114
        BuildThread getThread()
115
        {
116
            mLogger.debug("get_thread");
117
            return mThread;
118
        }
119
    }
120
 
121
    /**Exception thrown to indicate an uncorrectable error
122
     */
123
    public class BuildException extends Exception
124
    {
125
        BuildException(String msg)
126
        {
127
            mLogger.fatal(msg);
128
        }
129
 
130
      private static final long serialVersionUID = 1L;
131
    }
132
 
133
    /**main method for the Build Daemon program
134
     * instantiates a BuildDaemon object
135
     */
136
    public static void main(String[] args)
137
    {
138
        String abtdXml = utilities.catDir (System.getenv("ABT_HOME"), "abtd.xml");
139
        DOMConfigurator.configure(abtdXml);
140
        mLogger.debug("main");
141
 
142
        mHostname = testEnvVar("GBE_HOSTNAME");
143
        testEnvVar("ANT_HOME");
144
        testEnvVar("GBE_UNC");
145
        testEnvVar("GBE_DPKG_SSH_PROPERTIES");
146
        mGbeLog = testEnvVar("GBE_LOG");
147
 
148
        File gl = new File( mGbeLog );
149
        if ( !gl.isDirectory() )
150
        {
151
            mLogger.fatal("main GBE_LOG is not a directory");
152
            System.exit(1);
153
        }
154
 
155
        // Connection information for the database
156
        String connectionString = System.getenv("GBE_RM_LOCATION");
157
        String username = System.getenv("GBE_RM_USERNAME");
158
        String password = System.getenv("GBE_RM_PASSWORD");
159
        boolean showHelp = false;
160
        int consumed = 0;
161
 
162
        for (int optind = 0; optind < args.length; optind += 1 + consumed)
163
        {
164
            consumed = 0;
165
            boolean argsRemains = optind < (args.length - 1);
166
            if (args[optind].equals("-c") && argsRemains )
167
            {
168
                connectionString = args[optind+1];
169
                consumed ++;
170
            }
171
            else if (args[optind].equals("-u") && argsRemains)
172
            {
173
                username = args[optind+1];
174
                consumed ++;
175
            }
176
            else if (args[optind].equals("-p") && argsRemains)
177
            {
178
                password = args[optind+1];
179
                consumed ++;
180
            }
181
            else
182
            {
183
                showHelp = true;
184
            }
185
        }
186
 
187
        if (    showHelp ||
188
                connectionString == null ||
189
                connectionString.length() == 0 ||
190
                username == null ||
191
                username.length() == 0 ||
192
                password == null ||
193
                password.length() == 0)
194
        {
195
            mLogger.fatal("Usage: java -jar abtdD.jar -c connectionString -u username -p password");
196
            System.exit(1);
197
        }
198
 
199
        BuildDaemon buildDaemon = new BuildDaemon(connectionString, username, password);
200
        buildDaemon.cleanUp();
201
 
202
    }
203
 
204
    /**
205
     * Test a named EnvVar to ensure that it has been set
206
     * Call after the mLogger has been setup
207
     *  @param varName - Name of string to examine
208
     *  @return - String value of the parameter
209
     */
210
    private static String testEnvVar(String varName) {
211
        String envVar = System.getenv(varName);
212
 
213
        if ( envVar == null )
214
        {
215
            mLogger.fatal("main "+ varName +" not set");
216
            System.exit(1);
217
        }
218
        return envVar;
219
    }
220
 
221
    /**constructor, implements the sequence diagram spawn thread
222
     */
223
    public BuildDaemon(String connectionString, String username, String password)
224
    {
225
        this(new ReleaseManager(connectionString, username + "[release_manager]", password));
226
    }
227
 
228
    public BuildDaemon(ReleaseManager releaseManager)
229
    {
230
        String utf = null;
231
        mLogger.warn("BuildDaemon");
232
 
233
        try
234
        {
235
            // Flag UTF in progress
236
            if ( releaseManager.mConnectionString.compareTo("unit test spawn thread") == 0 )
237
            {
238
                utf = releaseManager.mConnectionString;
239
            }
240
 
241
            if ( Package.mGenericMachtype == null )
242
            {
243
                throw new BuildException("run GBE_MACHTYPE not set");
244
            }
245
 
246
            if ( Package.mGbeDpkg == null )
247
            {
248
                throw new BuildException("run GBE_DPKG not set");
249
            }
250
 
251
            //	Set the default handler invoked when a thread abruptly terminates due to an
252
            //	uncaught exception, and no other handler has been defined for that thread.
253
            //
254
            Thread.setDefaultUncaughtExceptionHandler(new BuildDaemonUncaughtExceptionHandler());
255
 
256
            //
257
            //  Start the Nagios Interface Thread
258
            //    Unless performing a unit test
259
            //
260
            if ( utf == null )
261
            {
262
                startNagiosServer();
263
            }
264
 
265
            //	Discover new build daemons to be started on the current host
266
            //
267
 
268
            while (!mShutDown)
269
            {
270
                mPollTime = 3;
271
 
272
                // Poll for slave build requests
273
                lookForBuildRequests(releaseManager, utf);
274
 
275
                // Poll for new builds every few (5) minutes
276
                if ( System.currentTimeMillis() - mLastBuildPoll > 5 * 60 * 1000) {
277
                    lookForNewBuildDaemons(releaseManager, utf);
278
                    mLastBuildPoll = System.currentTimeMillis();
279
                }
280
 
281
                //  In UTF mode we only execute the loop once
282
                if ( utf != null ) {
283
                    break;
284
                }
285
 
286
                // Wait 3 seconds before polling again
287
                daemonSleepSecs(mPollTime);
288
            }
289
 
290
            mLogger.fatal("BuildDaemon daemon spawning shutdown");
291
            if (mShutDown)
292
            {
293
                notifyAllBuildThreads();
294
                cleanUp();
295
            }
296
        }
297
        catch( BuildException e )
298
        {
299
            mLogger.fatal("BuildDaemon caught Exception");
300
        }
301
    }
302
 
303
    /**
304
     * Used by the Polling Thread to notify slaves of an active build request
305
     * The slaves will 'wait' for notification in order to proceed
306
     * Thus the polling is done once, rather than each slave thread polling individually
307
     * @param rconId
308
     */
309
    public void notifyBuildThread(Integer rconId) {
310
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
311
        {
312
            ThreadIdentifier threadIdentifier = it.next();
313
 
314
            if (threadIdentifier.mThread.isAlive())
315
            {
316
                BuildThread bt = threadIdentifier.mThread;
317
                if (bt.mRconId == rconId ) {
318
                    mLogger.warn("Notify RtagId:"+ bt.mRtagId +", bmlId:" + bt.mRconId);
319
                    synchronized (bt.mActiveBuildMonitor) {
320
                        bt.mActiveBuildMonitor.notifyAll();
321
                    }
322
                }
323
            }
324
        }
325
    }
326
 
327
    /**
328
     * Notify all build threads of some activity
329
     * This will release all threads waiting on the ActiveBuildMonitor
330
     */
331
    public void notifyAllBuildThreads() {
332
        mLogger.fatal("notifyAllBuildThreads");
333
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
334
        {
335
            ThreadIdentifier threadIdentifier = it.next();
336
 
337
            if (threadIdentifier.mThread.isAlive())
338
            {
339
                BuildThread bt = threadIdentifier.mThread;
340
 
341
                synchronized (bt.mActiveBuildMonitor) {
342
                    bt.mActiveBuildMonitor.notifyAll();
343
                }
344
            }
345
        }
346
    }
347
 
348
    /** Look for changes in the build daemons required
349
     *  Startup new daemons
350
     *  Shutdown ones that are no longer required
351
     * 
352
     *  @param  releaseManager - The release Manager instance
353
     *  @param  utf            - Unit Test information 
354
     */
355
    private void lookForNewBuildDaemons (ReleaseManager releaseManager, String utf) {
356
        mLogger.fatal("lookForNewBuildDaemons" );
357
        try
358
        {
359
            // Create a list of all machines that are configured to run on this machine
360
            //
361
            releaseManager.queryReleaseConfig(mHostname);
362
 
363
            //
364
            //  Iterate over all the configured machines
365
            //  Start up new build threads for new machines
366
            //
367
            for (Iterator<ReleaseConfig> it = releaseManager.mReleaseConfigCollection.mReleaseConfig.iterator(); it.hasNext(); )
368
            {
369
                ReleaseConfig rc = it.next();
370
 
371
                if (!isActive(rc.getRconId()))
372
                {
373
                    //
374
                    //  Ensure the Release is configured with exactly one master
375
                    //
376
                    int masterCount = releaseManager.queryMasterCount( rc.getRtagId() );
377
                    if ( masterCount != 1 )
378
                    {
379
                        mLogger.fatal("BuildDaemon activating. Invalid Masters " + rc.getRtagId() + " MasterCount: " +  masterCount );
380
                        continue;
381
                    }
382
 
383
                    mLogger.warn("BuildDaemon activating " + rc.getRtagId() + " " + rc.getRconId() + " " + rc.getDaemonMode());
384
 
385
                    //
386
                    //    Clone the BuildDaemons ReleaseManager thread
387
                    //    Done so that we can perform unit testing by extending the ReleaseManager class
388
                    //    Need to be able to have the Master/Slave threads process through the overridden
389
                    //    methods within the extended ReleaseManager class
390
                    //
391
                    ReleaseManager threadReleaseManager = (ReleaseManager) releaseManager.clone();
392
 
393
                    // spawn and run the BuildThread
394
                    if (rc.getDaemonMode() == 'M')
395
                    {
396
                        MasterThread thread = new MasterThread(rc.getRtagId(), rc.getRconId(), threadReleaseManager, utf);
397
                        ThreadIdentifier threadIdentifier =  new ThreadIdentifier(rc.getRconId(), thread);
398
                        mThreadCollection.add(threadIdentifier);
399
                        thread.start();
400
                    }
401
                    else if (rc.getDaemonMode() == 'S')
402
                    {
403
                        SlaveThread thread = new SlaveThread(rc.getRtagId(), rc.getRconId(), threadReleaseManager, utf);
404
                        ThreadIdentifier threadIdentifier = new ThreadIdentifier(rc.getRconId(), thread);
405
                        mThreadCollection.add(threadIdentifier);
406
                        thread.start();
407
                    }
408
                }
409
            }
410
 
411
            //
412
            //  Clean out terminated threads from the thread collection
413
            //
414
            cleanupTerminatedThreads();
415
 
416
        }
417
        catch (SQLException e)
418
        {
419
            mLogger.warn("BuildDaemon caught SQLException");
420
        }
421
        catch (InterruptedException e)
422
        {
423
            mLogger.warn("BuildDaemon caught InterruptedException");
424
            Thread.currentThread().interrupt();
425
        }
426
        catch (Exception e)
427
        {
428
            mLogger.warn("BuildDaemon caught Exception");
429
        }
430
    }
431
 
432
    /** Look for new build requests for slaves on this machine
433
     *  It is better to have one thread poll every 3 seconds, than have all the slave
434
     *  threads poll every 3 seconds 
435
     * 
436
     * @param releaseManager
437
     * @param utf
438
     */
439
 
440
    private void lookForBuildRequests(ReleaseManager releaseManager, String utf) {
441
        if (utf != null) {
442
            return;
443
        }
444
 
445
        mLogger.debug("Poll Cycle");
446
        try {
447
 
448
            //  Generate a list of all rconIds that are currently active on this machine
449
            //  Will be used to limit the query
450
            //
451
            StringBuilder rconIdList = new StringBuilder();
452
            String joiner = null;
453
 
454
            for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
455
            {
456
                ThreadIdentifier threadIdentifier = it.next();
457
 
458
                if (threadIdentifier.mThread.isAlive())
459
                {
460
                    BuildThread bt = threadIdentifier.mThread;
461
                    if( joiner != null) {
462
                        rconIdList.append(joiner);
463
                    }
464
                    rconIdList.append(bt.mRconId);
465
                    joiner = ",";
466
                }
467
            }
468
 
469
            if (joiner != null) {
470
                //
471
                //  Locate slaves that have an outstanding build request and master that have forced poll requests
472
                //  Notify the threads of this condition
473
                ArrayList<Integer> rv = releaseManager.queryActivatedBuilds(mHostname, rconIdList.toString());
474
                for (Integer rconId : rv) {
475
                    mLogger.warn("Activate: " + rconId);
476
                    notifyBuildThread(rconId);
477
                }
478
            }
479
 
480
        } catch (Exception e) {
481
            mLogger.fatal("lookForBuildRequests Exception:" + e.getMessage());
482
            mPollTime = 30;
483
        }
484
 
485
    }
486
 
487
    /**
488
     * Start up the Nagios server
489
     * @throws BuildException
490
     */
491
    private void startNagiosServer() throws BuildException {
492
        try {
493
            nagiosSrv = new ServerSocket(1111);
494
            nagiosChecker = new NagiosThread(nagiosSrv, this);
495
            nagiosChecker.start();
496
        } catch ( IOException e ) {
497
            throw new BuildException("Nagios port in use");
498
        }
499
    }
500
 
501
    /**
502
     * Clean out terminated threads from the thread collection
503
     * Examine all the build threads and get rid of those that have been terminated
504
     */
505
    private void cleanupTerminatedThreads()
506
    {
507
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
508
        {
509
            ThreadIdentifier threadIdentifier = it.next();
510
 
511
            if (!threadIdentifier.getThread().isAlive())
512
            {
513
                BuildThread bt = threadIdentifier.getThread();
514
                mLogger.warn("BuildDaemon removal " + bt.mRtagId + " " + bt.mRconId + " " + bt.getMode());
515
                it.remove();
516
            }
517
        }
518
    }
519
 
520
    /**calls isAlive on the Thread object associated with the rcon_id
521
     */
522
    public boolean isActive(final int rcon_id)
523
    {
524
        mLogger.debug("isActive " + rcon_id);
525
        boolean retVal = false;
526
        boolean found = false;
527
 
528
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
529
        {
530
            ThreadIdentifier threadIdentifier = it.next();
531
 
532
            if (threadIdentifier.getRconId() == rcon_id)
533
            {
534
                found = true;
535
                if (threadIdentifier.getThread().isAlive())
536
                {
537
                    retVal = true;
538
                    break;
539
                }
540
                else
541
                {
542
                    mLogger.warn("isActive found dead thread " + rcon_id );
543
                }
544
            }
545
        }
546
 
547
        if ( !found )
548
        {
549
            mLogger.warn("isActive thread not found " + rcon_id);
550
        }
551
 
552
        mLogger.debug("isActive returned " + retVal);
553
        return retVal;
554
    }
555
 
556
    /**
557
     *  Nagios interface
558
     *          Must have one active thread
559
     *          Examine all threads - for logging
560
     */
561
    void checkThreads( NagiosInfo nagInfo)
562
    {
563
        mLogger.info("checkThreads called");
564
        if (!mThreadCollection.isEmpty())
565
        {
566
            for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext();) 
567
            {
568
                ThreadIdentifier threadIdentifier = it.next();
569
 
570
                nagInfo.threadCount ++;
571
                if ( threadIdentifier.getThread().isAlive()) {
572
                    nagInfo.threadAliveCount ++;
573
                }
574
 
575
                threadIdentifier.getThread().checkThread(nagInfo);
576
            }
577
        }
578
        else
579
        {
580
            nagInfo.extendedReason.add("No Build Threads configured");
581
        }
582
    }
583
 
584
    /**
585
     *  Nagios interface
586
     *      Provide extended information on all threads
587
     * @param estatus 
588
     */
589
    public void extendedStatus(Map<String, Object> estatus)
590
    {
591
        NagiosInfo nagInfo = new NagiosInfo();
592
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext();) 
593
        {
594
            ThreadIdentifier threadIdentifier = it.next();
595
            threadIdentifier.getThread().extendedStatus(nagInfo, estatus);
596
        }
597
    }
598
 
599
 
600
    /**
601
     * daemonSleepSecs
602
     * Sleep for a specified number of seconds allowing for
603
     * a termination request
604
     * 
605
     * The function will sleep for 5 seconds at a time, 
606
     * then check for a termination request
607
     * 
608
     * @param sleepSecs The number of seconds to sleep
609
     * 
610
     * @return True :    Termination requested
611
     *         
612
     *         False:    No Termination requested
613
     */
614
    public static boolean daemonSleepSecs(int sleepSecs)
615
    {
616
        while (sleepSecs > 0)
617
        {
618
            if (mShutDown)
619
            {
620
                mLogger.fatal("daemonSleepSecs detected termiantion request");
621
                return true;
622
            }
623
 
624
            int sleepTime = sleepSecs;
625
            if (sleepTime > 5)
626
                sleepTime = 5;
627
 
628
            try
629
            {
630
                Thread.sleep(sleepTime * 1000L);
631
            }
632
            catch (InterruptedException e)
633
            {
634
                mLogger.warn("daemonSleepSecs sleep caught InterruptedException");
635
                Thread.currentThread().interrupt();
636
                break;
637
            }
638
            sleepSecs -= sleepTime;
639
        }
640
 
641
        return false;
642
    }
643
 
644
    /**terminates all BuildThreads
645
     */
646
    public void cleanUp()
647
    {
648
        mLogger.warn("cleanUp");
649
 
650
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
651
        {
652
            ThreadIdentifier threadIdentifier = it.next();
653
 
654
            if (threadIdentifier.getThread().isAlive())
655
            {
656
                try
657
                {
658
                    threadIdentifier.getThread().interrupt();
659
                    threadIdentifier.getThread().join();
660
                }
661
                catch( InterruptedException e )
662
                {
663
                    Thread.currentThread().interrupt();
664
                }
665
            }
666
        }
667
 
668
        if ( nagiosChecker != null )
669
        {
670
            nagiosChecker.terminate();
671
        }
672
 
673
        //  If a shutdown has been requested, then exit the program
674
        //  The only thing left running should be a timer task
675
 
676
        if (mShutDown)
677
        {
678
            System.exit(6);
679
        }
680
    }
681
}
682
 
683
class BuildDaemonUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler
684
{
685
    private static final Logger mLogger = Logger.getLogger(BuildDaemonUncaughtExceptionHandler.class);
686
 
687
    //Implements Thread.UncaughtExceptionHandler.uncaughtException()
688
    public void uncaughtException(Thread th, Throwable ex)
689
    {
690
        System.out.println("You crashed thread " + th.getName());
691
        System.out.println("Exception was: " + ex.toString());
692
 
693
        mLogger.fatal("UncaughtException ThreadName: " + th.getName());
694
        mLogger.fatal("UncaughtException was: " + ex.toString());
695
    }
696
}