Subversion Repositories DevTools

Rev

Rev 7077 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6914 dpurdie 1
package com.erggroup.buildtool.daemon;
2
 
3
import com.erggroup.buildtool.ripple.Package;
4
import com.erggroup.buildtool.ripple.ReleaseConfig;
5
import com.erggroup.buildtool.ripple.ReleaseManager;
6
 
7
import java.io.File;
8
import java.io.IOException;
9
import java.net.ServerSocket;
10
import java.sql.SQLException;
11
import java.util.ArrayList;
12
import java.util.Iterator;
13
import java.util.Map;
14
import java.util.concurrent.ConcurrentLinkedQueue;
15
 
7033 dpurdie 16
import org.slf4j.Logger ;
17
import org.slf4j.LoggerFactory;
6914 dpurdie 18
 
19
/**BuildDaemon sub component and entry point (main BuildDaemon thread)
20
 */
21
public class BuildDaemon
22
{
23
 
24
    /**hostname
25
     * @attribute
26
     */
27
    static String mHostname;
28
 
29
    /**GBE_LOG
30
     * @attribute
31
     */
32
    static String mGbeLog;
33
 
34
    /** mShutDown
35
	 * @attribute
36
	 *  Request to shutdown the build system gracefully
37
	 */
38
	static boolean mShutDown = false;
39
 
40
    /**Logger
41
     * @attribute
42
     */
7033 dpurdie 43
    protected static Logger mLogger =  LoggerFactory.getLogger(BuildDaemon.class);
6914 dpurdie 44
 
45
    /**Collection of ThreadIdentifier objects.
46
     * Using a ConcurrentLinkedQueue because we add and remove items from the collection
47
     * and it is being accessed from multiple threads.
48
     * @attribute
49
     */
50
    private ConcurrentLinkedQueue<ThreadIdentifier> mThreadCollection = new ConcurrentLinkedQueue<ThreadIdentifier>();
51
 
52
    /** Last time we did a poll for new builds
53
     * 
54
     */
55
    private long mLastBuildPoll = 0;
56
 
57
    /** Build Process poll time in seconds
58
     *  Modified on error to slow the poll rate
59
     */
60
    private int mPollTime = 3;
61
 
62
    /**Nagios
63
     * @attribute
64
     */
65
    ServerSocket nagiosSrv;
66
    NagiosThread nagiosChecker;
67
 
68
    /** Local class to assist in reporting Nagios Status
69
     * 
70
     */
71
    public static class NagiosInfo {
72
        int        threadCount = 0;
73
        int        threadAliveCount = 0;
74
        int        masterCount = 0;
75
        int        slaveCount = 0;
76
        ArrayList<String> extendedReason  = new ArrayList<String>();
77
    }
78
 
79
    /**mThreadCollection items
80
     */
81
    private class ThreadIdentifier
82
    {
83
        /**rcon_id associated with the thread
84
         * @attribute
85
         */
86
        private final int mRconId;
87
 
88
        /**thread identifier
89
         * @attribute
90
         */
91
        private final BuildThread mThread;
92
 
93
        /**constructor
94
         */
95
        ThreadIdentifier(int rconId, BuildThread thread)
96
        {
7033 dpurdie 97
            mLogger.debug("ThreadIdentifier {}", rconId);
6914 dpurdie 98
            mRconId = rconId;
99
            mThread = thread;
100
        }
101
 
102
        /**accessor
103
         */
104
        int getRconId()
105
        {
7033 dpurdie 106
            mLogger.info("get_rcon_id returned {}", mRconId);
6914 dpurdie 107
            return mRconId;
108
        }
109
 
110
        /**accessor
111
         */
112
        BuildThread getThread()
113
        {
114
            mLogger.debug("get_thread");
115
            return mThread;
116
        }
117
    }
118
 
119
    /**Exception thrown to indicate an uncorrectable error
120
     */
121
    public class BuildException extends Exception
122
    {
123
        BuildException(String msg)
124
        {
7033 dpurdie 125
            mLogger.error(msg);
6914 dpurdie 126
        }
127
 
128
      private static final long serialVersionUID = 1L;
129
    }
130
 
131
    /**main method for the Build Daemon program
132
     * instantiates a BuildDaemon object
133
     */
134
    public static void main(String[] args)
135
    {
136
        mLogger.debug("main");
137
 
138
        mHostname = testEnvVar("GBE_HOSTNAME");
139
        testEnvVar("ANT_HOME");
140
        testEnvVar("GBE_UNC");
141
        testEnvVar("GBE_DPKG_SSH_PROPERTIES");
142
        mGbeLog = testEnvVar("GBE_LOG");
143
 
144
        File gl = new File( mGbeLog );
145
        if ( !gl.isDirectory() )
146
        {
7033 dpurdie 147
            mLogger.error("main GBE_LOG is not a directory");
6914 dpurdie 148
            System.exit(1);
149
        }
150
 
151
        // Connection information for the database
152
        String connectionString = System.getenv("GBE_RM_LOCATION");
153
        String username = System.getenv("GBE_RM_USERNAME");
154
        String password = System.getenv("GBE_RM_PASSWORD");
155
        boolean showHelp = false;
156
        int consumed = 0;
157
 
158
        for (int optind = 0; optind < args.length; optind += 1 + consumed)
159
        {
160
            consumed = 0;
161
            boolean argsRemains = optind < (args.length - 1);
162
            if (args[optind].equals("-c") && argsRemains )
163
            {
164
                connectionString = args[optind+1];
165
                consumed ++;
166
            }
167
            else if (args[optind].equals("-u") && argsRemains)
168
            {
169
                username = args[optind+1];
170
                consumed ++;
171
            }
172
            else if (args[optind].equals("-p") && argsRemains)
173
            {
174
                password = args[optind+1];
175
                consumed ++;
176
            }
177
            else
178
            {
179
                showHelp = true;
180
            }
181
        }
182
 
183
        if (    showHelp ||
184
                connectionString == null ||
185
                connectionString.length() == 0 ||
186
                username == null ||
187
                username.length() == 0 ||
188
                password == null ||
189
                password.length() == 0)
190
        {
7033 dpurdie 191
            mLogger.error("Usage: java -jar abtdD.jar -c connectionString -u username -p password");
6914 dpurdie 192
            System.exit(1);
193
        }
194
 
195
        BuildDaemon buildDaemon = new BuildDaemon(connectionString, username, password);
196
        buildDaemon.cleanUp();
197
 
198
    }
199
 
200
    /**
201
     * Test a named EnvVar to ensure that it has been set
202
     * Call after the mLogger has been setup
203
     *  @param varName - Name of string to examine
204
     *  @return - String value of the parameter
205
     */
206
    private static String testEnvVar(String varName) {
207
        String envVar = System.getenv(varName);
208
 
209
        if ( envVar == null )
210
        {
7033 dpurdie 211
            mLogger.error("main {} not set", varName);
6914 dpurdie 212
            System.exit(1);
213
        }
214
        return envVar;
215
    }
216
 
217
    /**constructor, implements the sequence diagram spawn thread
218
     */
219
    public BuildDaemon(String connectionString, String username, String password)
220
    {
221
        this(new ReleaseManager(connectionString, username + "[release_manager]", password));
222
    }
223
 
224
    public BuildDaemon(ReleaseManager releaseManager)
225
    {
226
        String utf = null;
227
        mLogger.warn("BuildDaemon");
228
 
229
        try
230
        {
231
            // Flag UTF in progress
232
            if ( releaseManager.mConnectionString.compareTo("unit test spawn thread") == 0 )
233
            {
234
                utf = releaseManager.mConnectionString;
235
            }
236
 
237
            if ( Package.mGenericMachtype == null )
238
            {
239
                throw new BuildException("run GBE_MACHTYPE not set");
240
            }
241
 
242
            if ( Package.mGbeDpkg == null )
243
            {
244
                throw new BuildException("run GBE_DPKG not set");
245
            }
246
 
247
            //	Set the default handler invoked when a thread abruptly terminates due to an
248
            //	uncaught exception, and no other handler has been defined for that thread.
249
            //
250
            Thread.setDefaultUncaughtExceptionHandler(new BuildDaemonUncaughtExceptionHandler());
251
 
252
            //
253
            //  Start the Nagios Interface Thread
254
            //    Unless performing a unit test
255
            //
256
            if ( utf == null )
257
            {
258
                startNagiosServer();
259
            }
260
 
261
            //	Discover new build daemons to be started on the current host
262
            //
263
 
264
            while (!mShutDown)
265
            {
266
                mPollTime = 3;
267
 
268
                // Poll for slave build requests
269
                lookForBuildRequests(releaseManager, utf);
270
 
271
                // Poll for new builds every few (5) minutes
272
                if ( System.currentTimeMillis() - mLastBuildPoll > 5 * 60 * 1000) {
273
                    lookForNewBuildDaemons(releaseManager, utf);
274
                    mLastBuildPoll = System.currentTimeMillis();
275
                }
276
 
277
                //  In UTF mode we only execute the loop once
278
                if ( utf != null ) {
279
                    break;
280
                }
281
 
282
                // Wait 3 seconds before polling again
283
                daemonSleepSecs(mPollTime);
284
            }
285
 
7033 dpurdie 286
            mLogger.error("BuildDaemon daemon spawning shutdown");
6914 dpurdie 287
            if (mShutDown)
288
            {
289
                notifyAllBuildThreads();
290
                cleanUp();
291
            }
292
        }
293
        catch( BuildException e )
294
        {
7033 dpurdie 295
            mLogger.error("BuildDaemon caught Exception");
6914 dpurdie 296
        }
297
    }
298
 
299
    /**
300
     * Used by the Polling Thread to notify slaves of an active build request
301
     * The slaves will 'wait' for notification in order to proceed
302
     * Thus the polling is done once, rather than each slave thread polling individually
303
     * @param rconId
304
     */
305
    public void notifyBuildThread(Integer rconId) {
306
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
307
        {
308
            ThreadIdentifier threadIdentifier = it.next();
309
 
310
            if (threadIdentifier.mThread.isAlive())
311
            {
312
                BuildThread bt = threadIdentifier.mThread;
313
                if (bt.mRconId == rconId ) {
7033 dpurdie 314
                    mLogger.warn("Notify RtagId:{}, bmlId:{}", bt.mRtagId, bt.mRconId );
6914 dpurdie 315
                    synchronized (bt.mActiveBuildMonitor) {
316
                        bt.mActiveBuildMonitor.notifyAll();
317
                    }
318
                }
319
            }
320
        }
321
    }
322
 
323
    /**
324
     * Notify all build threads of some activity
325
     * This will release all threads waiting on the ActiveBuildMonitor
326
     */
327
    public void notifyAllBuildThreads() {
7033 dpurdie 328
        mLogger.error("notifyAllBuildThreads");
6914 dpurdie 329
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
330
        {
331
            ThreadIdentifier threadIdentifier = it.next();
332
 
333
            if (threadIdentifier.mThread.isAlive())
334
            {
335
                BuildThread bt = threadIdentifier.mThread;
336
 
337
                synchronized (bt.mActiveBuildMonitor) {
338
                    bt.mActiveBuildMonitor.notifyAll();
339
                }
340
            }
341
        }
342
    }
343
 
344
    /** Look for changes in the build daemons required
345
     *  Startup new daemons
346
     *  Shutdown ones that are no longer required
347
     * 
348
     *  @param  releaseManager - The release Manager instance
349
     *  @param  utf            - Unit Test information 
350
     */
351
    private void lookForNewBuildDaemons (ReleaseManager releaseManager, String utf) {
7080 dpurdie 352
        mLogger.error("lookForNewBuildDaemons as {}", mHostname );
6914 dpurdie 353
        try
354
        {
355
            // Create a list of all machines that are configured to run on this machine
356
            //
357
            releaseManager.queryReleaseConfig(mHostname);
358
 
359
            //
360
            //  Iterate over all the configured machines
361
            //  Start up new build threads for new machines
362
            //
363
            for (Iterator<ReleaseConfig> it = releaseManager.mReleaseConfigCollection.mReleaseConfig.iterator(); it.hasNext(); )
364
            {
365
                ReleaseConfig rc = it.next();
366
 
367
                if (!isActive(rc.getRconId()))
368
                {
369
                    //
370
                    //  Ensure the Release is configured with exactly one master
371
                    //
372
                    int masterCount = releaseManager.queryMasterCount( rc.getRtagId() );
373
                    if ( masterCount != 1 )
374
                    {
7033 dpurdie 375
                        mLogger.error("BuildDaemon activating. Invalid Masters {} MasterCount: {}", rc.getRtagId(),  masterCount );
6914 dpurdie 376
                        continue;
377
                    }
378
 
7033 dpurdie 379
                    mLogger.warn("BuildDaemon activating {} {} {}",rc.getRtagId(), rc.getRconId(), rc.getDaemonMode());
6914 dpurdie 380
 
381
                    //
382
                    //    Clone the BuildDaemons ReleaseManager thread
383
                    //    Done so that we can perform unit testing by extending the ReleaseManager class
384
                    //    Need to be able to have the Master/Slave threads process through the overridden
385
                    //    methods within the extended ReleaseManager class
386
                    //
387
                    ReleaseManager threadReleaseManager = (ReleaseManager) releaseManager.clone();
388
 
389
                    // spawn and run the BuildThread
390
                    if (rc.getDaemonMode() == 'M')
391
                    {
392
                        MasterThread thread = new MasterThread(rc.getRtagId(), rc.getRconId(), threadReleaseManager, utf);
393
                        ThreadIdentifier threadIdentifier =  new ThreadIdentifier(rc.getRconId(), thread);
394
                        mThreadCollection.add(threadIdentifier);
395
                        thread.start();
396
                    }
397
                    else if (rc.getDaemonMode() == 'S')
398
                    {
399
                        SlaveThread thread = new SlaveThread(rc.getRtagId(), rc.getRconId(), threadReleaseManager, utf);
400
                        ThreadIdentifier threadIdentifier = new ThreadIdentifier(rc.getRconId(), thread);
401
                        mThreadCollection.add(threadIdentifier);
402
                        thread.start();
403
                    }
404
                }
405
            }
406
 
407
            //
408
            //  Clean out terminated threads from the thread collection
409
            //
410
            cleanupTerminatedThreads();
411
 
412
        }
413
        catch (SQLException e)
414
        {
415
            mLogger.warn("BuildDaemon caught SQLException");
416
        }
417
        catch (InterruptedException e)
418
        {
419
            mLogger.warn("BuildDaemon caught InterruptedException");
420
            Thread.currentThread().interrupt();
421
        }
422
        catch (Exception e)
423
        {
424
            mLogger.warn("BuildDaemon caught Exception");
425
        }
426
    }
427
 
428
    /** Look for new build requests for slaves on this machine
429
     *  It is better to have one thread poll every 3 seconds, than have all the slave
430
     *  threads poll every 3 seconds 
431
     * 
432
     * @param releaseManager
433
     * @param utf
434
     */
435
 
436
    private void lookForBuildRequests(ReleaseManager releaseManager, String utf) {
437
        if (utf != null) {
438
            return;
439
        }
440
 
441
        mLogger.debug("Poll Cycle");
442
        try {
443
 
444
            //  Generate a list of all rconIds that are currently active on this machine
445
            //  Will be used to limit the query
446
            //
447
            StringBuilder rconIdList = new StringBuilder();
448
            String joiner = null;
449
 
450
            for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
451
            {
452
                ThreadIdentifier threadIdentifier = it.next();
453
 
454
                if (threadIdentifier.mThread.isAlive())
455
                {
456
                    BuildThread bt = threadIdentifier.mThread;
457
                    if( joiner != null) {
458
                        rconIdList.append(joiner);
459
                    }
460
                    rconIdList.append(bt.mRconId);
461
                    joiner = ",";
462
                }
463
            }
464
 
465
            if (joiner != null) {
466
                //
467
                //  Locate slaves that have an outstanding build request and master that have forced poll requests
468
                //  Notify the threads of this condition
469
                ArrayList<Integer> rv = releaseManager.queryActivatedBuilds(mHostname, rconIdList.toString());
470
                for (Integer rconId : rv) {
7033 dpurdie 471
                    mLogger.warn("Activate:{}", rconId);
6914 dpurdie 472
                    notifyBuildThread(rconId);
473
                }
474
            }
475
 
476
        } catch (Exception e) {
7033 dpurdie 477
            mLogger.error("lookForBuildRequests Exception: {}", e.getMessage());
6914 dpurdie 478
            mPollTime = 30;
479
        }
480
 
481
    }
482
 
483
    /**
484
     * Start up the Nagios server
485
     * @throws BuildException
486
     */
487
    private void startNagiosServer() throws BuildException {
488
        try {
489
            nagiosSrv = new ServerSocket(1111);
490
            nagiosChecker = new NagiosThread(nagiosSrv, this);
491
            nagiosChecker.start();
492
        } catch ( IOException e ) {
493
            throw new BuildException("Nagios port in use");
494
        }
495
    }
496
 
497
    /**
498
     * Clean out terminated threads from the thread collection
499
     * Examine all the build threads and get rid of those that have been terminated
500
     */
501
    private void cleanupTerminatedThreads()
502
    {
503
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
504
        {
505
            ThreadIdentifier threadIdentifier = it.next();
506
 
507
            if (!threadIdentifier.getThread().isAlive())
508
            {
509
                BuildThread bt = threadIdentifier.getThread();
7033 dpurdie 510
                mLogger.warn("BuildDaemon removal {} {} {}", bt.mRtagId, bt.mRconId, bt.getMode());
6914 dpurdie 511
                it.remove();
512
            }
513
        }
514
    }
515
 
516
    /**calls isAlive on the Thread object associated with the rcon_id
517
     */
518
    public boolean isActive(final int rcon_id)
519
    {
7033 dpurdie 520
        mLogger.debug("isActive {}", rcon_id);
6914 dpurdie 521
        boolean retVal = false;
522
        boolean found = false;
523
 
524
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
525
        {
526
            ThreadIdentifier threadIdentifier = it.next();
527
 
528
            if (threadIdentifier.getRconId() == rcon_id)
529
            {
530
                found = true;
531
                if (threadIdentifier.getThread().isAlive())
532
                {
533
                    retVal = true;
534
                    break;
535
                }
536
                else
537
                {
7033 dpurdie 538
                    mLogger.warn("isActive found dead thread {}", rcon_id );
6914 dpurdie 539
                }
540
            }
541
        }
542
 
543
        if ( !found )
544
        {
7033 dpurdie 545
            mLogger.warn("isActive thread not found {}", rcon_id);
6914 dpurdie 546
        }
547
 
7033 dpurdie 548
        mLogger.debug("isActive returned {}", retVal);
6914 dpurdie 549
        return retVal;
550
    }
551
 
552
    /**
553
     *  Nagios interface
554
     *          Must have one active thread
555
     *          Examine all threads - for logging
556
     */
557
    void checkThreads( NagiosInfo nagInfo)
558
    {
559
        mLogger.info("checkThreads called");
560
        if (!mThreadCollection.isEmpty())
561
        {
562
            for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext();) 
563
            {
564
                ThreadIdentifier threadIdentifier = it.next();
565
 
566
                nagInfo.threadCount ++;
567
                if ( threadIdentifier.getThread().isAlive()) {
568
                    nagInfo.threadAliveCount ++;
569
                }
570
 
571
                threadIdentifier.getThread().checkThread(nagInfo);
572
            }
573
        }
574
        else
575
        {
576
            nagInfo.extendedReason.add("No Build Threads configured");
577
        }
578
    }
579
 
580
    /**
581
     *  Nagios interface
582
     *      Provide extended information on all threads
583
     * @param estatus 
584
     */
585
    public void extendedStatus(Map<String, Object> estatus)
586
    {
587
        NagiosInfo nagInfo = new NagiosInfo();
588
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext();) 
589
        {
590
            ThreadIdentifier threadIdentifier = it.next();
591
            threadIdentifier.getThread().extendedStatus(nagInfo, estatus);
592
        }
593
    }
594
 
595
 
596
    /**
597
     * daemonSleepSecs
598
     * Sleep for a specified number of seconds allowing for
599
     * a termination request
600
     * 
601
     * The function will sleep for 5 seconds at a time, 
602
     * then check for a termination request
603
     * 
604
     * @param sleepSecs The number of seconds to sleep
605
     * 
606
     * @return True :    Termination requested
607
     *         
608
     *         False:    No Termination requested
609
     */
610
    public static boolean daemonSleepSecs(int sleepSecs)
611
    {
612
        while (sleepSecs > 0)
613
        {
614
            if (mShutDown)
615
            {
7033 dpurdie 616
                mLogger.error("daemonSleepSecs detected termiantion request");
6914 dpurdie 617
                return true;
618
            }
619
 
620
            int sleepTime = sleepSecs;
621
            if (sleepTime > 5)
622
                sleepTime = 5;
623
 
624
            try
625
            {
626
                Thread.sleep(sleepTime * 1000L);
627
            }
628
            catch (InterruptedException e)
629
            {
630
                mLogger.warn("daemonSleepSecs sleep caught InterruptedException");
631
                Thread.currentThread().interrupt();
632
                break;
633
            }
634
            sleepSecs -= sleepTime;
635
        }
636
 
637
        return false;
638
    }
639
 
640
    /**terminates all BuildThreads
641
     */
642
    public void cleanUp()
643
    {
644
        mLogger.warn("cleanUp");
645
 
646
        for (Iterator<ThreadIdentifier> it = mThreadCollection.iterator(); it.hasNext(); )
647
        {
648
            ThreadIdentifier threadIdentifier = it.next();
649
 
650
            if (threadIdentifier.getThread().isAlive())
651
            {
652
                try
653
                {
654
                    threadIdentifier.getThread().interrupt();
655
                    threadIdentifier.getThread().join();
656
                }
657
                catch( InterruptedException e )
658
                {
659
                    Thread.currentThread().interrupt();
660
                }
661
            }
662
        }
663
 
664
        if ( nagiosChecker != null )
665
        {
666
            nagiosChecker.terminate();
667
        }
668
 
669
        //  If a shutdown has been requested, then exit the program
670
        //  The only thing left running should be a timer task
671
 
672
        if (mShutDown)
673
        {
674
            System.exit(6);
675
        }
676
    }
677
}
678
 
679
class BuildDaemonUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler
680
{
7033 dpurdie 681
    private static final Logger mLogger = LoggerFactory.getLogger(BuildDaemonUncaughtExceptionHandler.class);
6914 dpurdie 682
 
683
    //Implements Thread.UncaughtExceptionHandler.uncaughtException()
684
    public void uncaughtException(Thread th, Throwable ex)
685
    {
686
        System.out.println("You crashed thread " + th.getName());
687
        System.out.println("Exception was: " + ex.toString());
688
 
7033 dpurdie 689
        mLogger.error("UncaughtException ThreadName: {}", th.getName());
690
        mLogger.error("UncaughtException was: {}", ex);
6914 dpurdie 691
    }
692
}